diff options
Diffstat (limited to 'kernel')
60 files changed, 2667 insertions, 1498 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 5404911eaee9..0dfeca4324ee 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -54,6 +54,7 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | |||
| 54 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o | 54 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o |
| 55 | obj-$(CONFIG_UID16) += uid16.o | 55 | obj-$(CONFIG_UID16) += uid16.o |
| 56 | obj-$(CONFIG_MODULES) += module.o | 56 | obj-$(CONFIG_MODULES) += module.o |
| 57 | obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o | ||
| 57 | obj-$(CONFIG_KALLSYMS) += kallsyms.o | 58 | obj-$(CONFIG_KALLSYMS) += kallsyms.o |
| 58 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o | 59 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o |
| 59 | obj-$(CONFIG_KEXEC) += kexec.o | 60 | obj-$(CONFIG_KEXEC) += kexec.o |
| @@ -130,3 +131,79 @@ quiet_cmd_timeconst = TIMEC $@ | |||
| 130 | targets += timeconst.h | 131 | targets += timeconst.h |
| 131 | $(obj)/timeconst.h: $(src)/timeconst.pl FORCE | 132 | $(obj)/timeconst.h: $(src)/timeconst.pl FORCE |
| 132 | $(call if_changed,timeconst) | 133 | $(call if_changed,timeconst) |
| 134 | |||
| 135 | ifeq ($(CONFIG_MODULE_SIG),y) | ||
| 136 | # | ||
| 137 | # Pull the signing certificate and any extra certificates into the kernel | ||
| 138 | # | ||
| 139 | extra_certificates: | ||
| 140 | touch $@ | ||
| 141 | |||
| 142 | kernel/modsign_pubkey.o: signing_key.x509 extra_certificates | ||
| 143 | |||
| 144 | ############################################################################### | ||
| 145 | # | ||
| 146 | # If module signing is requested, say by allyesconfig, but a key has not been | ||
| 147 | # supplied, then one will need to be generated to make sure the build does not | ||
| 148 | # fail and that the kernel may be used afterwards. | ||
| 149 | # | ||
| 150 | ############################################################################### | ||
| 151 | sign_key_with_hash := | ||
| 152 | ifeq ($(CONFIG_MODULE_SIG_SHA1),y) | ||
| 153 | sign_key_with_hash := -sha1 | ||
| 154 | endif | ||
| 155 | ifeq ($(CONFIG_MODULE_SIG_SHA224),y) | ||
| 156 | sign_key_with_hash := -sha224 | ||
| 157 | endif | ||
| 158 | ifeq ($(CONFIG_MODULE_SIG_SHA256),y) | ||
| 159 | sign_key_with_hash := -sha256 | ||
| 160 | endif | ||
| 161 | ifeq ($(CONFIG_MODULE_SIG_SHA384),y) | ||
| 162 | sign_key_with_hash := -sha384 | ||
| 163 | endif | ||
| 164 | ifeq ($(CONFIG_MODULE_SIG_SHA512),y) | ||
| 165 | sign_key_with_hash := -sha512 | ||
| 166 | endif | ||
| 167 | ifeq ($(sign_key_with_hash),) | ||
| 168 | $(error Could not determine digest type to use from kernel config) | ||
| 169 | endif | ||
| 170 | |||
| 171 | signing_key.priv signing_key.x509: x509.genkey | ||
| 172 | @echo "###" | ||
| 173 | @echo "### Now generating an X.509 key pair to be used for signing modules." | ||
| 174 | @echo "###" | ||
| 175 | @echo "### If this takes a long time, you might wish to run rngd in the" | ||
| 176 | @echo "### background to keep the supply of entropy topped up. It" | ||
| 177 | @echo "### needs to be run as root, and should use a hardware random" | ||
| 178 | @echo "### number generator if one is available, eg:" | ||
| 179 | @echo "###" | ||
| 180 | @echo "### rngd -r /dev/hwrandom" | ||
| 181 | @echo "###" | ||
| 182 | openssl req -new -nodes -utf8 $(sign_key_with_hash) -days 36500 -batch \ | ||
| 183 | -x509 -config x509.genkey \ | ||
| 184 | -outform DER -out signing_key.x509 \ | ||
| 185 | -keyout signing_key.priv | ||
| 186 | @echo "###" | ||
| 187 | @echo "### Key pair generated." | ||
| 188 | @echo "###" | ||
| 189 | |||
| 190 | x509.genkey: | ||
| 191 | @echo Generating X.509 key generation config | ||
| 192 | @echo >x509.genkey "[ req ]" | ||
| 193 | @echo >>x509.genkey "default_bits = 4096" | ||
| 194 | @echo >>x509.genkey "distinguished_name = req_distinguished_name" | ||
| 195 | @echo >>x509.genkey "prompt = no" | ||
| 196 | @echo >>x509.genkey "string_mask = utf8only" | ||
| 197 | @echo >>x509.genkey "x509_extensions = myexts" | ||
| 198 | @echo >>x509.genkey | ||
| 199 | @echo >>x509.genkey "[ req_distinguished_name ]" | ||
| 200 | @echo >>x509.genkey "O = Magrathea" | ||
| 201 | @echo >>x509.genkey "CN = Glacier signing key" | ||
| 202 | @echo >>x509.genkey "emailAddress = slartibartfast@magrathea.h2g2" | ||
| 203 | @echo >>x509.genkey | ||
| 204 | @echo >>x509.genkey "[ myexts ]" | ||
| 205 | @echo >>x509.genkey "basicConstraints=critical,CA:FALSE" | ||
| 206 | @echo >>x509.genkey "keyUsage=digitalSignature" | ||
| 207 | @echo >>x509.genkey "subjectKeyIdentifier=hash" | ||
| 208 | @echo >>x509.genkey "authorityKeyIdentifier=keyid" | ||
| 209 | endif | ||
diff --git a/kernel/acct.c b/kernel/acct.c index 02e6167a53b0..051e071a06e7 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
| @@ -193,7 +193,7 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, | |||
| 193 | } | 193 | } |
| 194 | } | 194 | } |
| 195 | 195 | ||
| 196 | static int acct_on(char *name) | 196 | static int acct_on(struct filename *pathname) |
| 197 | { | 197 | { |
| 198 | struct file *file; | 198 | struct file *file; |
| 199 | struct vfsmount *mnt; | 199 | struct vfsmount *mnt; |
| @@ -201,7 +201,7 @@ static int acct_on(char *name) | |||
| 201 | struct bsd_acct_struct *acct = NULL; | 201 | struct bsd_acct_struct *acct = NULL; |
| 202 | 202 | ||
| 203 | /* Difference from BSD - they don't do O_APPEND */ | 203 | /* Difference from BSD - they don't do O_APPEND */ |
| 204 | file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0); | 204 | file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0); |
| 205 | if (IS_ERR(file)) | 205 | if (IS_ERR(file)) |
| 206 | return PTR_ERR(file); | 206 | return PTR_ERR(file); |
| 207 | 207 | ||
| @@ -260,7 +260,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name) | |||
| 260 | return -EPERM; | 260 | return -EPERM; |
| 261 | 261 | ||
| 262 | if (name) { | 262 | if (name) { |
| 263 | char *tmp = getname(name); | 263 | struct filename *tmp = getname(name); |
| 264 | if (IS_ERR(tmp)) | 264 | if (IS_ERR(tmp)) |
| 265 | return (PTR_ERR(tmp)); | 265 | return (PTR_ERR(tmp)); |
| 266 | error = acct_on(tmp); | 266 | error = acct_on(tmp); |
| @@ -507,8 +507,8 @@ static void do_acct_process(struct bsd_acct_struct *acct, | |||
| 507 | do_div(elapsed, AHZ); | 507 | do_div(elapsed, AHZ); |
| 508 | ac.ac_btime = get_seconds() - elapsed; | 508 | ac.ac_btime = get_seconds() - elapsed; |
| 509 | /* we really need to bite the bullet and change layout */ | 509 | /* we really need to bite the bullet and change layout */ |
| 510 | ac.ac_uid = orig_cred->uid; | 510 | ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid); |
| 511 | ac.ac_gid = orig_cred->gid; | 511 | ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid); |
| 512 | #if ACCT_VERSION==2 | 512 | #if ACCT_VERSION==2 |
| 513 | ac.ac_ahz = AHZ; | 513 | ac.ac_ahz = AHZ; |
| 514 | #endif | 514 | #endif |
diff --git a/kernel/audit.c b/kernel/audit.c index ea3b7b6191c7..40414e9143db 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
| @@ -61,6 +61,7 @@ | |||
| 61 | #include <linux/netlink.h> | 61 | #include <linux/netlink.h> |
| 62 | #include <linux/freezer.h> | 62 | #include <linux/freezer.h> |
| 63 | #include <linux/tty.h> | 63 | #include <linux/tty.h> |
| 64 | #include <linux/pid_namespace.h> | ||
| 64 | 65 | ||
| 65 | #include "audit.h" | 66 | #include "audit.h" |
| 66 | 67 | ||
| @@ -87,11 +88,11 @@ static int audit_failure = AUDIT_FAIL_PRINTK; | |||
| 87 | 88 | ||
| 88 | /* | 89 | /* |
| 89 | * If audit records are to be written to the netlink socket, audit_pid | 90 | * If audit records are to be written to the netlink socket, audit_pid |
| 90 | * contains the pid of the auditd process and audit_nlk_pid contains | 91 | * contains the pid of the auditd process and audit_nlk_portid contains |
| 91 | * the pid to use to send netlink messages to that process. | 92 | * the portid to use to send netlink messages to that process. |
| 92 | */ | 93 | */ |
| 93 | int audit_pid; | 94 | int audit_pid; |
| 94 | static int audit_nlk_pid; | 95 | static int audit_nlk_portid; |
| 95 | 96 | ||
| 96 | /* If audit_rate_limit is non-zero, limit the rate of sending audit records | 97 | /* If audit_rate_limit is non-zero, limit the rate of sending audit records |
| 97 | * to that number per second. This prevents DoS attacks, but results in | 98 | * to that number per second. This prevents DoS attacks, but results in |
| @@ -104,7 +105,7 @@ static int audit_backlog_wait_time = 60 * HZ; | |||
| 104 | static int audit_backlog_wait_overflow = 0; | 105 | static int audit_backlog_wait_overflow = 0; |
| 105 | 106 | ||
| 106 | /* The identity of the user shutting down the audit system. */ | 107 | /* The identity of the user shutting down the audit system. */ |
| 107 | uid_t audit_sig_uid = -1; | 108 | kuid_t audit_sig_uid = INVALID_UID; |
| 108 | pid_t audit_sig_pid = -1; | 109 | pid_t audit_sig_pid = -1; |
| 109 | u32 audit_sig_sid = 0; | 110 | u32 audit_sig_sid = 0; |
| 110 | 111 | ||
| @@ -264,7 +265,7 @@ void audit_log_lost(const char *message) | |||
| 264 | } | 265 | } |
| 265 | 266 | ||
| 266 | static int audit_log_config_change(char *function_name, int new, int old, | 267 | static int audit_log_config_change(char *function_name, int new, int old, |
| 267 | uid_t loginuid, u32 sessionid, u32 sid, | 268 | kuid_t loginuid, u32 sessionid, u32 sid, |
| 268 | int allow_changes) | 269 | int allow_changes) |
| 269 | { | 270 | { |
| 270 | struct audit_buffer *ab; | 271 | struct audit_buffer *ab; |
| @@ -272,7 +273,7 @@ static int audit_log_config_change(char *function_name, int new, int old, | |||
| 272 | 273 | ||
| 273 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); | 274 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); |
| 274 | audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new, | 275 | audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new, |
| 275 | old, loginuid, sessionid); | 276 | old, from_kuid(&init_user_ns, loginuid), sessionid); |
| 276 | if (sid) { | 277 | if (sid) { |
| 277 | char *ctx = NULL; | 278 | char *ctx = NULL; |
| 278 | u32 len; | 279 | u32 len; |
| @@ -292,7 +293,7 @@ static int audit_log_config_change(char *function_name, int new, int old, | |||
| 292 | } | 293 | } |
| 293 | 294 | ||
| 294 | static int audit_do_config_change(char *function_name, int *to_change, | 295 | static int audit_do_config_change(char *function_name, int *to_change, |
| 295 | int new, uid_t loginuid, u32 sessionid, | 296 | int new, kuid_t loginuid, u32 sessionid, |
| 296 | u32 sid) | 297 | u32 sid) |
| 297 | { | 298 | { |
| 298 | int allow_changes, rc = 0, old = *to_change; | 299 | int allow_changes, rc = 0, old = *to_change; |
| @@ -319,21 +320,21 @@ static int audit_do_config_change(char *function_name, int *to_change, | |||
| 319 | return rc; | 320 | return rc; |
| 320 | } | 321 | } |
| 321 | 322 | ||
| 322 | static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sessionid, | 323 | static int audit_set_rate_limit(int limit, kuid_t loginuid, u32 sessionid, |
| 323 | u32 sid) | 324 | u32 sid) |
| 324 | { | 325 | { |
| 325 | return audit_do_config_change("audit_rate_limit", &audit_rate_limit, | 326 | return audit_do_config_change("audit_rate_limit", &audit_rate_limit, |
| 326 | limit, loginuid, sessionid, sid); | 327 | limit, loginuid, sessionid, sid); |
| 327 | } | 328 | } |
| 328 | 329 | ||
| 329 | static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sessionid, | 330 | static int audit_set_backlog_limit(int limit, kuid_t loginuid, u32 sessionid, |
| 330 | u32 sid) | 331 | u32 sid) |
| 331 | { | 332 | { |
| 332 | return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit, | 333 | return audit_do_config_change("audit_backlog_limit", &audit_backlog_limit, |
| 333 | limit, loginuid, sessionid, sid); | 334 | limit, loginuid, sessionid, sid); |
| 334 | } | 335 | } |
| 335 | 336 | ||
| 336 | static int audit_set_enabled(int state, uid_t loginuid, u32 sessionid, u32 sid) | 337 | static int audit_set_enabled(int state, kuid_t loginuid, u32 sessionid, u32 sid) |
| 337 | { | 338 | { |
| 338 | int rc; | 339 | int rc; |
| 339 | if (state < AUDIT_OFF || state > AUDIT_LOCKED) | 340 | if (state < AUDIT_OFF || state > AUDIT_LOCKED) |
| @@ -348,7 +349,7 @@ static int audit_set_enabled(int state, uid_t loginuid, u32 sessionid, u32 sid) | |||
| 348 | return rc; | 349 | return rc; |
| 349 | } | 350 | } |
| 350 | 351 | ||
| 351 | static int audit_set_failure(int state, uid_t loginuid, u32 sessionid, u32 sid) | 352 | static int audit_set_failure(int state, kuid_t loginuid, u32 sessionid, u32 sid) |
| 352 | { | 353 | { |
| 353 | if (state != AUDIT_FAIL_SILENT | 354 | if (state != AUDIT_FAIL_SILENT |
| 354 | && state != AUDIT_FAIL_PRINTK | 355 | && state != AUDIT_FAIL_PRINTK |
| @@ -401,7 +402,7 @@ static void kauditd_send_skb(struct sk_buff *skb) | |||
| 401 | int err; | 402 | int err; |
| 402 | /* take a reference in case we can't send it and we want to hold it */ | 403 | /* take a reference in case we can't send it and we want to hold it */ |
| 403 | skb_get(skb); | 404 | skb_get(skb); |
| 404 | err = netlink_unicast(audit_sock, skb, audit_nlk_pid, 0); | 405 | err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); |
| 405 | if (err < 0) { | 406 | if (err < 0) { |
| 406 | BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */ | 407 | BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */ |
| 407 | printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); | 408 | printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); |
| @@ -467,24 +468,6 @@ static int kauditd_thread(void *dummy) | |||
| 467 | return 0; | 468 | return 0; |
| 468 | } | 469 | } |
| 469 | 470 | ||
| 470 | static int audit_prepare_user_tty(pid_t pid, uid_t loginuid, u32 sessionid) | ||
| 471 | { | ||
| 472 | struct task_struct *tsk; | ||
| 473 | int err; | ||
| 474 | |||
| 475 | rcu_read_lock(); | ||
| 476 | tsk = find_task_by_vpid(pid); | ||
| 477 | if (!tsk) { | ||
| 478 | rcu_read_unlock(); | ||
| 479 | return -ESRCH; | ||
| 480 | } | ||
| 481 | get_task_struct(tsk); | ||
| 482 | rcu_read_unlock(); | ||
| 483 | err = tty_audit_push_task(tsk, loginuid, sessionid); | ||
| 484 | put_task_struct(tsk); | ||
| 485 | return err; | ||
| 486 | } | ||
| 487 | |||
| 488 | int audit_send_list(void *_dest) | 471 | int audit_send_list(void *_dest) |
| 489 | { | 472 | { |
| 490 | struct audit_netlink_list *dest = _dest; | 473 | struct audit_netlink_list *dest = _dest; |
| @@ -588,6 +571,11 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) | |||
| 588 | { | 571 | { |
| 589 | int err = 0; | 572 | int err = 0; |
| 590 | 573 | ||
| 574 | /* Only support the initial namespaces for now. */ | ||
| 575 | if ((current_user_ns() != &init_user_ns) || | ||
| 576 | (task_active_pid_ns(current) != &init_pid_ns)) | ||
| 577 | return -EPERM; | ||
| 578 | |||
| 591 | switch (msg_type) { | 579 | switch (msg_type) { |
| 592 | case AUDIT_GET: | 580 | case AUDIT_GET: |
| 593 | case AUDIT_LIST: | 581 | case AUDIT_LIST: |
| @@ -619,8 +607,7 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) | |||
| 619 | } | 607 | } |
| 620 | 608 | ||
| 621 | static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, | 609 | static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, |
| 622 | u32 pid, u32 uid, uid_t auid, u32 ses, | 610 | kuid_t auid, u32 ses, u32 sid) |
| 623 | u32 sid) | ||
| 624 | { | 611 | { |
| 625 | int rc = 0; | 612 | int rc = 0; |
| 626 | char *ctx = NULL; | 613 | char *ctx = NULL; |
| @@ -633,7 +620,9 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, | |||
| 633 | 620 | ||
| 634 | *ab = audit_log_start(NULL, GFP_KERNEL, msg_type); | 621 | *ab = audit_log_start(NULL, GFP_KERNEL, msg_type); |
| 635 | audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u", | 622 | audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u", |
| 636 | pid, uid, auid, ses); | 623 | task_tgid_vnr(current), |
| 624 | from_kuid(&init_user_ns, current_uid()), | ||
| 625 | from_kuid(&init_user_ns, auid), ses); | ||
| 637 | if (sid) { | 626 | if (sid) { |
| 638 | rc = security_secid_to_secctx(sid, &ctx, &len); | 627 | rc = security_secid_to_secctx(sid, &ctx, &len); |
| 639 | if (rc) | 628 | if (rc) |
| @@ -649,13 +638,13 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, | |||
| 649 | 638 | ||
| 650 | static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | 639 | static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) |
| 651 | { | 640 | { |
| 652 | u32 uid, pid, seq, sid; | 641 | u32 seq, sid; |
| 653 | void *data; | 642 | void *data; |
| 654 | struct audit_status *status_get, status_set; | 643 | struct audit_status *status_get, status_set; |
| 655 | int err; | 644 | int err; |
| 656 | struct audit_buffer *ab; | 645 | struct audit_buffer *ab; |
| 657 | u16 msg_type = nlh->nlmsg_type; | 646 | u16 msg_type = nlh->nlmsg_type; |
| 658 | uid_t loginuid; /* loginuid of sender */ | 647 | kuid_t loginuid; /* loginuid of sender */ |
| 659 | u32 sessionid; | 648 | u32 sessionid; |
| 660 | struct audit_sig_info *sig_data; | 649 | struct audit_sig_info *sig_data; |
| 661 | char *ctx = NULL; | 650 | char *ctx = NULL; |
| @@ -675,8 +664,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 675 | return err; | 664 | return err; |
| 676 | } | 665 | } |
| 677 | 666 | ||
| 678 | pid = NETLINK_CREDS(skb)->pid; | ||
| 679 | uid = NETLINK_CREDS(skb)->uid; | ||
| 680 | loginuid = audit_get_loginuid(current); | 667 | loginuid = audit_get_loginuid(current); |
| 681 | sessionid = audit_get_sessionid(current); | 668 | sessionid = audit_get_sessionid(current); |
| 682 | security_task_getsecid(current, &sid); | 669 | security_task_getsecid(current, &sid); |
| @@ -692,7 +679,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 692 | status_set.backlog_limit = audit_backlog_limit; | 679 | status_set.backlog_limit = audit_backlog_limit; |
| 693 | status_set.lost = atomic_read(&audit_lost); | 680 | status_set.lost = atomic_read(&audit_lost); |
| 694 | status_set.backlog = skb_queue_len(&audit_skb_queue); | 681 | status_set.backlog = skb_queue_len(&audit_skb_queue); |
| 695 | audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_GET, 0, 0, | 682 | audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, |
| 696 | &status_set, sizeof(status_set)); | 683 | &status_set, sizeof(status_set)); |
| 697 | break; | 684 | break; |
| 698 | case AUDIT_SET: | 685 | case AUDIT_SET: |
| @@ -720,7 +707,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 720 | sessionid, sid, 1); | 707 | sessionid, sid, 1); |
| 721 | 708 | ||
| 722 | audit_pid = new_pid; | 709 | audit_pid = new_pid; |
| 723 | audit_nlk_pid = NETLINK_CB(skb).pid; | 710 | audit_nlk_portid = NETLINK_CB(skb).portid; |
| 724 | } | 711 | } |
| 725 | if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) { | 712 | if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) { |
| 726 | err = audit_set_rate_limit(status_get->rate_limit, | 713 | err = audit_set_rate_limit(status_get->rate_limit, |
| @@ -738,16 +725,16 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 738 | if (!audit_enabled && msg_type != AUDIT_USER_AVC) | 725 | if (!audit_enabled && msg_type != AUDIT_USER_AVC) |
| 739 | return 0; | 726 | return 0; |
| 740 | 727 | ||
| 741 | err = audit_filter_user(&NETLINK_CB(skb)); | 728 | err = audit_filter_user(); |
| 742 | if (err == 1) { | 729 | if (err == 1) { |
| 743 | err = 0; | 730 | err = 0; |
| 744 | if (msg_type == AUDIT_USER_TTY) { | 731 | if (msg_type == AUDIT_USER_TTY) { |
| 745 | err = audit_prepare_user_tty(pid, loginuid, | 732 | err = tty_audit_push_task(current, loginuid, |
| 746 | sessionid); | 733 | sessionid); |
| 747 | if (err) | 734 | if (err) |
| 748 | break; | 735 | break; |
| 749 | } | 736 | } |
| 750 | audit_log_common_recv_msg(&ab, msg_type, pid, uid, | 737 | audit_log_common_recv_msg(&ab, msg_type, |
| 751 | loginuid, sessionid, sid); | 738 | loginuid, sessionid, sid); |
| 752 | 739 | ||
| 753 | if (msg_type != AUDIT_USER_TTY) | 740 | if (msg_type != AUDIT_USER_TTY) |
| @@ -763,7 +750,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 763 | size--; | 750 | size--; |
| 764 | audit_log_n_untrustedstring(ab, data, size); | 751 | audit_log_n_untrustedstring(ab, data, size); |
| 765 | } | 752 | } |
| 766 | audit_set_pid(ab, pid); | 753 | audit_set_pid(ab, NETLINK_CB(skb).portid); |
| 767 | audit_log_end(ab); | 754 | audit_log_end(ab); |
| 768 | } | 755 | } |
| 769 | break; | 756 | break; |
| @@ -772,8 +759,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 772 | if (nlmsg_len(nlh) < sizeof(struct audit_rule)) | 759 | if (nlmsg_len(nlh) < sizeof(struct audit_rule)) |
| 773 | return -EINVAL; | 760 | return -EINVAL; |
| 774 | if (audit_enabled == AUDIT_LOCKED) { | 761 | if (audit_enabled == AUDIT_LOCKED) { |
| 775 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, | 762 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, |
| 776 | uid, loginuid, sessionid, sid); | 763 | loginuid, sessionid, sid); |
| 777 | 764 | ||
| 778 | audit_log_format(ab, " audit_enabled=%d res=0", | 765 | audit_log_format(ab, " audit_enabled=%d res=0", |
| 779 | audit_enabled); | 766 | audit_enabled); |
| @@ -782,8 +769,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 782 | } | 769 | } |
| 783 | /* fallthrough */ | 770 | /* fallthrough */ |
| 784 | case AUDIT_LIST: | 771 | case AUDIT_LIST: |
| 785 | err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, | 772 | err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid, |
| 786 | uid, seq, data, nlmsg_len(nlh), | 773 | seq, data, nlmsg_len(nlh), |
| 787 | loginuid, sessionid, sid); | 774 | loginuid, sessionid, sid); |
| 788 | break; | 775 | break; |
| 789 | case AUDIT_ADD_RULE: | 776 | case AUDIT_ADD_RULE: |
| @@ -791,8 +778,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 791 | if (nlmsg_len(nlh) < sizeof(struct audit_rule_data)) | 778 | if (nlmsg_len(nlh) < sizeof(struct audit_rule_data)) |
| 792 | return -EINVAL; | 779 | return -EINVAL; |
| 793 | if (audit_enabled == AUDIT_LOCKED) { | 780 | if (audit_enabled == AUDIT_LOCKED) { |
| 794 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, | 781 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, |
| 795 | uid, loginuid, sessionid, sid); | 782 | loginuid, sessionid, sid); |
| 796 | 783 | ||
| 797 | audit_log_format(ab, " audit_enabled=%d res=0", | 784 | audit_log_format(ab, " audit_enabled=%d res=0", |
| 798 | audit_enabled); | 785 | audit_enabled); |
| @@ -801,15 +788,15 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 801 | } | 788 | } |
| 802 | /* fallthrough */ | 789 | /* fallthrough */ |
| 803 | case AUDIT_LIST_RULES: | 790 | case AUDIT_LIST_RULES: |
| 804 | err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, | 791 | err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid, |
| 805 | uid, seq, data, nlmsg_len(nlh), | 792 | seq, data, nlmsg_len(nlh), |
| 806 | loginuid, sessionid, sid); | 793 | loginuid, sessionid, sid); |
| 807 | break; | 794 | break; |
| 808 | case AUDIT_TRIM: | 795 | case AUDIT_TRIM: |
| 809 | audit_trim_trees(); | 796 | audit_trim_trees(); |
| 810 | 797 | ||
| 811 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, | 798 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, |
| 812 | uid, loginuid, sessionid, sid); | 799 | loginuid, sessionid, sid); |
| 813 | 800 | ||
| 814 | audit_log_format(ab, " op=trim res=1"); | 801 | audit_log_format(ab, " op=trim res=1"); |
| 815 | audit_log_end(ab); | 802 | audit_log_end(ab); |
| @@ -840,8 +827,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 840 | /* OK, here comes... */ | 827 | /* OK, here comes... */ |
| 841 | err = audit_tag_tree(old, new); | 828 | err = audit_tag_tree(old, new); |
| 842 | 829 | ||
| 843 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, pid, | 830 | audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE, |
| 844 | uid, loginuid, sessionid, sid); | 831 | loginuid, sessionid, sid); |
| 845 | 832 | ||
| 846 | audit_log_format(ab, " op=make_equiv old="); | 833 | audit_log_format(ab, " op=make_equiv old="); |
| 847 | audit_log_untrustedstring(ab, old); | 834 | audit_log_untrustedstring(ab, old); |
| @@ -866,53 +853,41 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 866 | security_release_secctx(ctx, len); | 853 | security_release_secctx(ctx, len); |
| 867 | return -ENOMEM; | 854 | return -ENOMEM; |
| 868 | } | 855 | } |
| 869 | sig_data->uid = audit_sig_uid; | 856 | sig_data->uid = from_kuid(&init_user_ns, audit_sig_uid); |
| 870 | sig_data->pid = audit_sig_pid; | 857 | sig_data->pid = audit_sig_pid; |
| 871 | if (audit_sig_sid) { | 858 | if (audit_sig_sid) { |
| 872 | memcpy(sig_data->ctx, ctx, len); | 859 | memcpy(sig_data->ctx, ctx, len); |
| 873 | security_release_secctx(ctx, len); | 860 | security_release_secctx(ctx, len); |
| 874 | } | 861 | } |
| 875 | audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, | 862 | audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_SIGNAL_INFO, |
| 876 | 0, 0, sig_data, sizeof(*sig_data) + len); | 863 | 0, 0, sig_data, sizeof(*sig_data) + len); |
| 877 | kfree(sig_data); | 864 | kfree(sig_data); |
| 878 | break; | 865 | break; |
| 879 | case AUDIT_TTY_GET: { | 866 | case AUDIT_TTY_GET: { |
| 880 | struct audit_tty_status s; | 867 | struct audit_tty_status s; |
| 881 | struct task_struct *tsk; | 868 | struct task_struct *tsk = current; |
| 882 | unsigned long flags; | 869 | |
| 883 | 870 | spin_lock_irq(&tsk->sighand->siglock); | |
| 884 | rcu_read_lock(); | 871 | s.enabled = tsk->signal->audit_tty != 0; |
| 885 | tsk = find_task_by_vpid(pid); | 872 | spin_unlock_irq(&tsk->sighand->siglock); |
| 886 | if (tsk && lock_task_sighand(tsk, &flags)) { | 873 | |
| 887 | s.enabled = tsk->signal->audit_tty != 0; | 874 | audit_send_reply(NETLINK_CB(skb).portid, seq, |
| 888 | unlock_task_sighand(tsk, &flags); | 875 | AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); |
| 889 | } else | ||
| 890 | err = -ESRCH; | ||
| 891 | rcu_read_unlock(); | ||
| 892 | |||
| 893 | if (!err) | ||
| 894 | audit_send_reply(NETLINK_CB(skb).pid, seq, | ||
| 895 | AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); | ||
| 896 | break; | 876 | break; |
| 897 | } | 877 | } |
| 898 | case AUDIT_TTY_SET: { | 878 | case AUDIT_TTY_SET: { |
| 899 | struct audit_tty_status *s; | 879 | struct audit_tty_status *s; |
| 900 | struct task_struct *tsk; | 880 | struct task_struct *tsk = current; |
| 901 | unsigned long flags; | ||
| 902 | 881 | ||
| 903 | if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) | 882 | if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) |
| 904 | return -EINVAL; | 883 | return -EINVAL; |
| 905 | s = data; | 884 | s = data; |
| 906 | if (s->enabled != 0 && s->enabled != 1) | 885 | if (s->enabled != 0 && s->enabled != 1) |
| 907 | return -EINVAL; | 886 | return -EINVAL; |
| 908 | rcu_read_lock(); | 887 | |
| 909 | tsk = find_task_by_vpid(pid); | 888 | spin_lock_irq(&tsk->sighand->siglock); |
| 910 | if (tsk && lock_task_sighand(tsk, &flags)) { | 889 | tsk->signal->audit_tty = s->enabled != 0; |
| 911 | tsk->signal->audit_tty = s->enabled != 0; | 890 | spin_unlock_irq(&tsk->sighand->siglock); |
| 912 | unlock_task_sighand(tsk, &flags); | ||
| 913 | } else | ||
| 914 | err = -ESRCH; | ||
| 915 | rcu_read_unlock(); | ||
| 916 | break; | 891 | break; |
| 917 | } | 892 | } |
| 918 | default: | 893 | default: |
| @@ -971,8 +946,7 @@ static int __init audit_init(void) | |||
| 971 | 946 | ||
| 972 | printk(KERN_INFO "audit: initializing netlink socket (%s)\n", | 947 | printk(KERN_INFO "audit: initializing netlink socket (%s)\n", |
| 973 | audit_default ? "enabled" : "disabled"); | 948 | audit_default ? "enabled" : "disabled"); |
| 974 | audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, | 949 | audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, &cfg); |
| 975 | THIS_MODULE, &cfg); | ||
| 976 | if (!audit_sock) | 950 | if (!audit_sock) |
| 977 | audit_panic("cannot initialize netlink socket"); | 951 | audit_panic("cannot initialize netlink socket"); |
| 978 | else | 952 | else |
| @@ -1466,6 +1440,8 @@ void audit_log_link_denied(const char *operation, struct path *link) | |||
| 1466 | 1440 | ||
| 1467 | ab = audit_log_start(current->audit_context, GFP_KERNEL, | 1441 | ab = audit_log_start(current->audit_context, GFP_KERNEL, |
| 1468 | AUDIT_ANOM_LINK); | 1442 | AUDIT_ANOM_LINK); |
| 1443 | if (!ab) | ||
| 1444 | return; | ||
| 1469 | audit_log_format(ab, "op=%s action=denied", operation); | 1445 | audit_log_format(ab, "op=%s action=denied", operation); |
| 1470 | audit_log_format(ab, " pid=%d comm=", current->pid); | 1446 | audit_log_format(ab, " pid=%d comm=", current->pid); |
| 1471 | audit_log_untrustedstring(ab, current->comm); | 1447 | audit_log_untrustedstring(ab, current->comm); |
diff --git a/kernel/audit.h b/kernel/audit.h index 816766803371..d51cba868e1b 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
| @@ -74,10 +74,15 @@ static inline int audit_hash_ino(u32 ino) | |||
| 74 | return (ino & (AUDIT_INODE_BUCKETS-1)); | 74 | return (ino & (AUDIT_INODE_BUCKETS-1)); |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | /* Indicates that audit should log the full pathname. */ | ||
| 78 | #define AUDIT_NAME_FULL -1 | ||
| 79 | |||
| 77 | extern int audit_match_class(int class, unsigned syscall); | 80 | extern int audit_match_class(int class, unsigned syscall); |
| 78 | extern int audit_comparator(const u32 left, const u32 op, const u32 right); | 81 | extern int audit_comparator(const u32 left, const u32 op, const u32 right); |
| 79 | extern int audit_compare_dname_path(const char *dname, const char *path, | 82 | extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right); |
| 80 | int *dirlen); | 83 | extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right); |
| 84 | extern int parent_len(const char *path); | ||
| 85 | extern int audit_compare_dname_path(const char *dname, const char *path, int plen); | ||
| 81 | extern struct sk_buff * audit_make_reply(int pid, int seq, int type, | 86 | extern struct sk_buff * audit_make_reply(int pid, int seq, int type, |
| 82 | int done, int multi, | 87 | int done, int multi, |
| 83 | const void *payload, int size); | 88 | const void *payload, int size); |
| @@ -144,7 +149,7 @@ extern void audit_kill_trees(struct list_head *); | |||
| 144 | extern char *audit_unpack_string(void **, size_t *, size_t); | 149 | extern char *audit_unpack_string(void **, size_t *, size_t); |
| 145 | 150 | ||
| 146 | extern pid_t audit_sig_pid; | 151 | extern pid_t audit_sig_pid; |
| 147 | extern uid_t audit_sig_uid; | 152 | extern kuid_t audit_sig_uid; |
| 148 | extern u32 audit_sig_sid; | 153 | extern u32 audit_sig_sid; |
| 149 | 154 | ||
| 150 | #ifdef CONFIG_AUDITSYSCALL | 155 | #ifdef CONFIG_AUDITSYSCALL |
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 3823281401b5..9a9ae6e3d290 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c | |||
| @@ -241,7 +241,7 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc | |||
| 241 | struct audit_buffer *ab; | 241 | struct audit_buffer *ab; |
| 242 | ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); | 242 | ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); |
| 243 | audit_log_format(ab, "auid=%u ses=%u op=", | 243 | audit_log_format(ab, "auid=%u ses=%u op=", |
| 244 | audit_get_loginuid(current), | 244 | from_kuid(&init_user_ns, audit_get_loginuid(current)), |
| 245 | audit_get_sessionid(current)); | 245 | audit_get_sessionid(current)); |
| 246 | audit_log_string(ab, op); | 246 | audit_log_string(ab, op); |
| 247 | audit_log_format(ab, " path="); | 247 | audit_log_format(ab, " path="); |
| @@ -265,7 +265,8 @@ static void audit_update_watch(struct audit_parent *parent, | |||
| 265 | /* Run all of the watches on this parent looking for the one that | 265 | /* Run all of the watches on this parent looking for the one that |
| 266 | * matches the given dname */ | 266 | * matches the given dname */ |
| 267 | list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) { | 267 | list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) { |
| 268 | if (audit_compare_dname_path(dname, owatch->path, NULL)) | 268 | if (audit_compare_dname_path(dname, owatch->path, |
| 269 | AUDIT_NAME_FULL)) | ||
| 269 | continue; | 270 | continue; |
| 270 | 271 | ||
| 271 | /* If the update involves invalidating rules, do the inode-based | 272 | /* If the update involves invalidating rules, do the inode-based |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a6c3f1abd206..7f19f23d38a3 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
| @@ -342,6 +342,8 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) | |||
| 342 | 342 | ||
| 343 | f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS); | 343 | f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS); |
| 344 | f->val = rule->values[i]; | 344 | f->val = rule->values[i]; |
| 345 | f->uid = INVALID_UID; | ||
| 346 | f->gid = INVALID_GID; | ||
| 345 | 347 | ||
| 346 | err = -EINVAL; | 348 | err = -EINVAL; |
| 347 | if (f->op == Audit_bad) | 349 | if (f->op == Audit_bad) |
| @@ -350,16 +352,32 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) | |||
| 350 | switch(f->type) { | 352 | switch(f->type) { |
| 351 | default: | 353 | default: |
| 352 | goto exit_free; | 354 | goto exit_free; |
| 353 | case AUDIT_PID: | ||
| 354 | case AUDIT_UID: | 355 | case AUDIT_UID: |
| 355 | case AUDIT_EUID: | 356 | case AUDIT_EUID: |
| 356 | case AUDIT_SUID: | 357 | case AUDIT_SUID: |
| 357 | case AUDIT_FSUID: | 358 | case AUDIT_FSUID: |
| 359 | case AUDIT_LOGINUID: | ||
| 360 | /* bit ops not implemented for uid comparisons */ | ||
| 361 | if (f->op == Audit_bitmask || f->op == Audit_bittest) | ||
| 362 | goto exit_free; | ||
| 363 | |||
| 364 | f->uid = make_kuid(current_user_ns(), f->val); | ||
| 365 | if (!uid_valid(f->uid)) | ||
| 366 | goto exit_free; | ||
| 367 | break; | ||
| 358 | case AUDIT_GID: | 368 | case AUDIT_GID: |
| 359 | case AUDIT_EGID: | 369 | case AUDIT_EGID: |
| 360 | case AUDIT_SGID: | 370 | case AUDIT_SGID: |
| 361 | case AUDIT_FSGID: | 371 | case AUDIT_FSGID: |
| 362 | case AUDIT_LOGINUID: | 372 | /* bit ops not implemented for gid comparisons */ |
| 373 | if (f->op == Audit_bitmask || f->op == Audit_bittest) | ||
| 374 | goto exit_free; | ||
| 375 | |||
| 376 | f->gid = make_kgid(current_user_ns(), f->val); | ||
| 377 | if (!gid_valid(f->gid)) | ||
| 378 | goto exit_free; | ||
| 379 | break; | ||
| 380 | case AUDIT_PID: | ||
| 363 | case AUDIT_PERS: | 381 | case AUDIT_PERS: |
| 364 | case AUDIT_MSGTYPE: | 382 | case AUDIT_MSGTYPE: |
| 365 | case AUDIT_PPID: | 383 | case AUDIT_PPID: |
| @@ -437,19 +455,39 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
| 437 | 455 | ||
| 438 | f->type = data->fields[i]; | 456 | f->type = data->fields[i]; |
| 439 | f->val = data->values[i]; | 457 | f->val = data->values[i]; |
| 458 | f->uid = INVALID_UID; | ||
| 459 | f->gid = INVALID_GID; | ||
| 440 | f->lsm_str = NULL; | 460 | f->lsm_str = NULL; |
| 441 | f->lsm_rule = NULL; | 461 | f->lsm_rule = NULL; |
| 442 | switch(f->type) { | 462 | switch(f->type) { |
| 443 | case AUDIT_PID: | ||
| 444 | case AUDIT_UID: | 463 | case AUDIT_UID: |
| 445 | case AUDIT_EUID: | 464 | case AUDIT_EUID: |
| 446 | case AUDIT_SUID: | 465 | case AUDIT_SUID: |
| 447 | case AUDIT_FSUID: | 466 | case AUDIT_FSUID: |
| 467 | case AUDIT_LOGINUID: | ||
| 468 | case AUDIT_OBJ_UID: | ||
| 469 | /* bit ops not implemented for uid comparisons */ | ||
| 470 | if (f->op == Audit_bitmask || f->op == Audit_bittest) | ||
| 471 | goto exit_free; | ||
| 472 | |||
| 473 | f->uid = make_kuid(current_user_ns(), f->val); | ||
| 474 | if (!uid_valid(f->uid)) | ||
| 475 | goto exit_free; | ||
| 476 | break; | ||
| 448 | case AUDIT_GID: | 477 | case AUDIT_GID: |
| 449 | case AUDIT_EGID: | 478 | case AUDIT_EGID: |
| 450 | case AUDIT_SGID: | 479 | case AUDIT_SGID: |
| 451 | case AUDIT_FSGID: | 480 | case AUDIT_FSGID: |
| 452 | case AUDIT_LOGINUID: | 481 | case AUDIT_OBJ_GID: |
| 482 | /* bit ops not implemented for gid comparisons */ | ||
| 483 | if (f->op == Audit_bitmask || f->op == Audit_bittest) | ||
| 484 | goto exit_free; | ||
| 485 | |||
| 486 | f->gid = make_kgid(current_user_ns(), f->val); | ||
| 487 | if (!gid_valid(f->gid)) | ||
| 488 | goto exit_free; | ||
| 489 | break; | ||
| 490 | case AUDIT_PID: | ||
| 453 | case AUDIT_PERS: | 491 | case AUDIT_PERS: |
| 454 | case AUDIT_MSGTYPE: | 492 | case AUDIT_MSGTYPE: |
| 455 | case AUDIT_PPID: | 493 | case AUDIT_PPID: |
| @@ -461,8 +499,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
| 461 | case AUDIT_ARG1: | 499 | case AUDIT_ARG1: |
| 462 | case AUDIT_ARG2: | 500 | case AUDIT_ARG2: |
| 463 | case AUDIT_ARG3: | 501 | case AUDIT_ARG3: |
| 464 | case AUDIT_OBJ_UID: | ||
| 465 | case AUDIT_OBJ_GID: | ||
| 466 | break; | 502 | break; |
| 467 | case AUDIT_ARCH: | 503 | case AUDIT_ARCH: |
| 468 | entry->rule.arch_f = f; | 504 | entry->rule.arch_f = f; |
| @@ -707,6 +743,23 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) | |||
| 707 | if (strcmp(a->filterkey, b->filterkey)) | 743 | if (strcmp(a->filterkey, b->filterkey)) |
| 708 | return 1; | 744 | return 1; |
| 709 | break; | 745 | break; |
| 746 | case AUDIT_UID: | ||
| 747 | case AUDIT_EUID: | ||
| 748 | case AUDIT_SUID: | ||
| 749 | case AUDIT_FSUID: | ||
| 750 | case AUDIT_LOGINUID: | ||
| 751 | case AUDIT_OBJ_UID: | ||
| 752 | if (!uid_eq(a->fields[i].uid, b->fields[i].uid)) | ||
| 753 | return 1; | ||
| 754 | break; | ||
| 755 | case AUDIT_GID: | ||
| 756 | case AUDIT_EGID: | ||
| 757 | case AUDIT_SGID: | ||
| 758 | case AUDIT_FSGID: | ||
| 759 | case AUDIT_OBJ_GID: | ||
| 760 | if (!gid_eq(a->fields[i].gid, b->fields[i].gid)) | ||
| 761 | return 1; | ||
| 762 | break; | ||
| 710 | default: | 763 | default: |
| 711 | if (a->fields[i].val != b->fields[i].val) | 764 | if (a->fields[i].val != b->fields[i].val) |
| 712 | return 1; | 765 | return 1; |
| @@ -1056,7 +1109,7 @@ static void audit_list_rules(int pid, int seq, struct sk_buff_head *q) | |||
| 1056 | } | 1109 | } |
| 1057 | 1110 | ||
| 1058 | /* Log rule additions and removals */ | 1111 | /* Log rule additions and removals */ |
| 1059 | static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, | 1112 | static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid, |
| 1060 | char *action, struct audit_krule *rule, | 1113 | char *action, struct audit_krule *rule, |
| 1061 | int res) | 1114 | int res) |
| 1062 | { | 1115 | { |
| @@ -1068,7 +1121,8 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, | |||
| 1068 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); | 1121 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); |
| 1069 | if (!ab) | 1122 | if (!ab) |
| 1070 | return; | 1123 | return; |
| 1071 | audit_log_format(ab, "auid=%u ses=%u", loginuid, sessionid); | 1124 | audit_log_format(ab, "auid=%u ses=%u", |
| 1125 | from_kuid(&init_user_ns, loginuid), sessionid); | ||
| 1072 | if (sid) { | 1126 | if (sid) { |
| 1073 | char *ctx = NULL; | 1127 | char *ctx = NULL; |
| 1074 | u32 len; | 1128 | u32 len; |
| @@ -1098,8 +1152,8 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, | |||
| 1098 | * @sessionid: sessionid for netlink audit message | 1152 | * @sessionid: sessionid for netlink audit message |
| 1099 | * @sid: SE Linux Security ID of sender | 1153 | * @sid: SE Linux Security ID of sender |
| 1100 | */ | 1154 | */ |
| 1101 | int audit_receive_filter(int type, int pid, int uid, int seq, void *data, | 1155 | int audit_receive_filter(int type, int pid, int seq, void *data, |
| 1102 | size_t datasz, uid_t loginuid, u32 sessionid, u32 sid) | 1156 | size_t datasz, kuid_t loginuid, u32 sessionid, u32 sid) |
| 1103 | { | 1157 | { |
| 1104 | struct task_struct *tsk; | 1158 | struct task_struct *tsk; |
| 1105 | struct audit_netlink_list *dest; | 1159 | struct audit_netlink_list *dest; |
| @@ -1198,46 +1252,110 @@ int audit_comparator(u32 left, u32 op, u32 right) | |||
| 1198 | } | 1252 | } |
| 1199 | } | 1253 | } |
| 1200 | 1254 | ||
| 1201 | /* Compare given dentry name with last component in given path, | 1255 | int audit_uid_comparator(kuid_t left, u32 op, kuid_t right) |
| 1202 | * return of 0 indicates a match. */ | ||
| 1203 | int audit_compare_dname_path(const char *dname, const char *path, | ||
| 1204 | int *dirlen) | ||
| 1205 | { | 1256 | { |
| 1206 | int dlen, plen; | 1257 | switch (op) { |
| 1207 | const char *p; | 1258 | case Audit_equal: |
| 1259 | return uid_eq(left, right); | ||
| 1260 | case Audit_not_equal: | ||
| 1261 | return !uid_eq(left, right); | ||
| 1262 | case Audit_lt: | ||
| 1263 | return uid_lt(left, right); | ||
| 1264 | case Audit_le: | ||
| 1265 | return uid_lte(left, right); | ||
| 1266 | case Audit_gt: | ||
| 1267 | return uid_gt(left, right); | ||
| 1268 | case Audit_ge: | ||
| 1269 | return uid_gte(left, right); | ||
| 1270 | case Audit_bitmask: | ||
| 1271 | case Audit_bittest: | ||
| 1272 | default: | ||
| 1273 | BUG(); | ||
| 1274 | return 0; | ||
| 1275 | } | ||
| 1276 | } | ||
| 1208 | 1277 | ||
| 1209 | if (!dname || !path) | 1278 | int audit_gid_comparator(kgid_t left, u32 op, kgid_t right) |
| 1210 | return 1; | 1279 | { |
| 1280 | switch (op) { | ||
| 1281 | case Audit_equal: | ||
| 1282 | return gid_eq(left, right); | ||
| 1283 | case Audit_not_equal: | ||
| 1284 | return !gid_eq(left, right); | ||
| 1285 | case Audit_lt: | ||
| 1286 | return gid_lt(left, right); | ||
| 1287 | case Audit_le: | ||
| 1288 | return gid_lte(left, right); | ||
| 1289 | case Audit_gt: | ||
| 1290 | return gid_gt(left, right); | ||
| 1291 | case Audit_ge: | ||
| 1292 | return gid_gte(left, right); | ||
| 1293 | case Audit_bitmask: | ||
| 1294 | case Audit_bittest: | ||
| 1295 | default: | ||
| 1296 | BUG(); | ||
| 1297 | return 0; | ||
| 1298 | } | ||
| 1299 | } | ||
| 1300 | |||
| 1301 | /** | ||
| 1302 | * parent_len - find the length of the parent portion of a pathname | ||
| 1303 | * @path: pathname of which to determine length | ||
| 1304 | */ | ||
| 1305 | int parent_len(const char *path) | ||
| 1306 | { | ||
| 1307 | int plen; | ||
| 1308 | const char *p; | ||
| 1211 | 1309 | ||
| 1212 | dlen = strlen(dname); | ||
| 1213 | plen = strlen(path); | 1310 | plen = strlen(path); |
| 1214 | if (plen < dlen) | 1311 | |
| 1215 | return 1; | 1312 | if (plen == 0) |
| 1313 | return plen; | ||
| 1216 | 1314 | ||
| 1217 | /* disregard trailing slashes */ | 1315 | /* disregard trailing slashes */ |
| 1218 | p = path + plen - 1; | 1316 | p = path + plen - 1; |
| 1219 | while ((*p == '/') && (p > path)) | 1317 | while ((*p == '/') && (p > path)) |
| 1220 | p--; | 1318 | p--; |
| 1221 | 1319 | ||
| 1222 | /* find last path component */ | 1320 | /* walk backward until we find the next slash or hit beginning */ |
| 1223 | p = p - dlen + 1; | 1321 | while ((*p != '/') && (p > path)) |
| 1224 | if (p < path) | 1322 | p--; |
| 1323 | |||
| 1324 | /* did we find a slash? Then increment to include it in path */ | ||
| 1325 | if (*p == '/') | ||
| 1326 | p++; | ||
| 1327 | |||
| 1328 | return p - path; | ||
| 1329 | } | ||
| 1330 | |||
| 1331 | /** | ||
| 1332 | * audit_compare_dname_path - compare given dentry name with last component in | ||
| 1333 | * given path. Return of 0 indicates a match. | ||
| 1334 | * @dname: dentry name that we're comparing | ||
| 1335 | * @path: full pathname that we're comparing | ||
| 1336 | * @parentlen: length of the parent if known. Passing in AUDIT_NAME_FULL | ||
| 1337 | * here indicates that we must compute this value. | ||
| 1338 | */ | ||
| 1339 | int audit_compare_dname_path(const char *dname, const char *path, int parentlen) | ||
| 1340 | { | ||
| 1341 | int dlen, pathlen; | ||
| 1342 | const char *p; | ||
| 1343 | |||
| 1344 | dlen = strlen(dname); | ||
| 1345 | pathlen = strlen(path); | ||
| 1346 | if (pathlen < dlen) | ||
| 1225 | return 1; | 1347 | return 1; |
| 1226 | else if (p > path) { | ||
| 1227 | if (*--p != '/') | ||
| 1228 | return 1; | ||
| 1229 | else | ||
| 1230 | p++; | ||
| 1231 | } | ||
| 1232 | 1348 | ||
| 1233 | /* return length of path's directory component */ | 1349 | parentlen = parentlen == AUDIT_NAME_FULL ? parent_len(path) : parentlen; |
| 1234 | if (dirlen) | 1350 | if (pathlen - parentlen != dlen) |
| 1235 | *dirlen = p - path; | 1351 | return 1; |
| 1352 | |||
| 1353 | p = path + parentlen; | ||
| 1354 | |||
| 1236 | return strncmp(p, dname, dlen); | 1355 | return strncmp(p, dname, dlen); |
| 1237 | } | 1356 | } |
| 1238 | 1357 | ||
| 1239 | static int audit_filter_user_rules(struct netlink_skb_parms *cb, | 1358 | static int audit_filter_user_rules(struct audit_krule *rule, |
| 1240 | struct audit_krule *rule, | ||
| 1241 | enum audit_state *state) | 1359 | enum audit_state *state) |
| 1242 | { | 1360 | { |
| 1243 | int i; | 1361 | int i; |
| @@ -1249,17 +1367,17 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, | |||
| 1249 | 1367 | ||
| 1250 | switch (f->type) { | 1368 | switch (f->type) { |
| 1251 | case AUDIT_PID: | 1369 | case AUDIT_PID: |
| 1252 | result = audit_comparator(cb->creds.pid, f->op, f->val); | 1370 | result = audit_comparator(task_pid_vnr(current), f->op, f->val); |
| 1253 | break; | 1371 | break; |
| 1254 | case AUDIT_UID: | 1372 | case AUDIT_UID: |
| 1255 | result = audit_comparator(cb->creds.uid, f->op, f->val); | 1373 | result = audit_uid_comparator(current_uid(), f->op, f->uid); |
| 1256 | break; | 1374 | break; |
| 1257 | case AUDIT_GID: | 1375 | case AUDIT_GID: |
| 1258 | result = audit_comparator(cb->creds.gid, f->op, f->val); | 1376 | result = audit_gid_comparator(current_gid(), f->op, f->gid); |
| 1259 | break; | 1377 | break; |
| 1260 | case AUDIT_LOGINUID: | 1378 | case AUDIT_LOGINUID: |
| 1261 | result = audit_comparator(audit_get_loginuid(current), | 1379 | result = audit_uid_comparator(audit_get_loginuid(current), |
| 1262 | f->op, f->val); | 1380 | f->op, f->uid); |
| 1263 | break; | 1381 | break; |
| 1264 | case AUDIT_SUBJ_USER: | 1382 | case AUDIT_SUBJ_USER: |
| 1265 | case AUDIT_SUBJ_ROLE: | 1383 | case AUDIT_SUBJ_ROLE: |
| @@ -1287,7 +1405,7 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, | |||
| 1287 | return 1; | 1405 | return 1; |
| 1288 | } | 1406 | } |
| 1289 | 1407 | ||
| 1290 | int audit_filter_user(struct netlink_skb_parms *cb) | 1408 | int audit_filter_user(void) |
| 1291 | { | 1409 | { |
| 1292 | enum audit_state state = AUDIT_DISABLED; | 1410 | enum audit_state state = AUDIT_DISABLED; |
| 1293 | struct audit_entry *e; | 1411 | struct audit_entry *e; |
| @@ -1295,7 +1413,7 @@ int audit_filter_user(struct netlink_skb_parms *cb) | |||
| 1295 | 1413 | ||
| 1296 | rcu_read_lock(); | 1414 | rcu_read_lock(); |
| 1297 | list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_USER], list) { | 1415 | list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_USER], list) { |
| 1298 | if (audit_filter_user_rules(cb, &e->rule, &state)) { | 1416 | if (audit_filter_user_rules(&e->rule, &state)) { |
| 1299 | if (state == AUDIT_DISABLED) | 1417 | if (state == AUDIT_DISABLED) |
| 1300 | ret = 0; | 1418 | ret = 0; |
| 1301 | break; | 1419 | break; |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 4b96415527b8..2f186ed80c40 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
| @@ -81,9 +81,6 @@ | |||
| 81 | * a name dynamically and also add those to the list anchored by names_list. */ | 81 | * a name dynamically and also add those to the list anchored by names_list. */ |
| 82 | #define AUDIT_NAMES 5 | 82 | #define AUDIT_NAMES 5 |
| 83 | 83 | ||
| 84 | /* Indicates that audit should log the full pathname. */ | ||
| 85 | #define AUDIT_NAME_FULL -1 | ||
| 86 | |||
| 87 | /* no execve audit message should be longer than this (userspace limits) */ | 84 | /* no execve audit message should be longer than this (userspace limits) */ |
| 88 | #define MAX_EXECVE_AUDIT_LEN 7500 | 85 | #define MAX_EXECVE_AUDIT_LEN 7500 |
| 89 | 86 | ||
| @@ -106,27 +103,29 @@ struct audit_cap_data { | |||
| 106 | * we don't let putname() free it (instead we free all of the saved | 103 | * we don't let putname() free it (instead we free all of the saved |
| 107 | * pointers at syscall exit time). | 104 | * pointers at syscall exit time). |
| 108 | * | 105 | * |
| 109 | * Further, in fs/namei.c:path_lookup() we store the inode and device. */ | 106 | * Further, in fs/namei.c:path_lookup() we store the inode and device. |
| 107 | */ | ||
| 110 | struct audit_names { | 108 | struct audit_names { |
| 111 | struct list_head list; /* audit_context->names_list */ | 109 | struct list_head list; /* audit_context->names_list */ |
| 112 | const char *name; | 110 | struct filename *name; |
| 113 | unsigned long ino; | 111 | unsigned long ino; |
| 114 | dev_t dev; | 112 | dev_t dev; |
| 115 | umode_t mode; | 113 | umode_t mode; |
| 116 | uid_t uid; | 114 | kuid_t uid; |
| 117 | gid_t gid; | 115 | kgid_t gid; |
| 118 | dev_t rdev; | 116 | dev_t rdev; |
| 119 | u32 osid; | 117 | u32 osid; |
| 120 | struct audit_cap_data fcap; | 118 | struct audit_cap_data fcap; |
| 121 | unsigned int fcap_ver; | 119 | unsigned int fcap_ver; |
| 122 | int name_len; /* number of name's characters to log */ | 120 | int name_len; /* number of name's characters to log */ |
| 123 | bool name_put; /* call __putname() for this name */ | 121 | unsigned char type; /* record type */ |
| 122 | bool name_put; /* call __putname() for this name */ | ||
| 124 | /* | 123 | /* |
| 125 | * This was an allocated audit_names and not from the array of | 124 | * This was an allocated audit_names and not from the array of |
| 126 | * names allocated in the task audit context. Thus this name | 125 | * names allocated in the task audit context. Thus this name |
| 127 | * should be freed on syscall exit | 126 | * should be freed on syscall exit |
| 128 | */ | 127 | */ |
| 129 | bool should_free; | 128 | bool should_free; |
| 130 | }; | 129 | }; |
| 131 | 130 | ||
| 132 | struct audit_aux_data { | 131 | struct audit_aux_data { |
| @@ -149,8 +148,8 @@ struct audit_aux_data_execve { | |||
| 149 | struct audit_aux_data_pids { | 148 | struct audit_aux_data_pids { |
| 150 | struct audit_aux_data d; | 149 | struct audit_aux_data d; |
| 151 | pid_t target_pid[AUDIT_AUX_PIDS]; | 150 | pid_t target_pid[AUDIT_AUX_PIDS]; |
| 152 | uid_t target_auid[AUDIT_AUX_PIDS]; | 151 | kuid_t target_auid[AUDIT_AUX_PIDS]; |
| 153 | uid_t target_uid[AUDIT_AUX_PIDS]; | 152 | kuid_t target_uid[AUDIT_AUX_PIDS]; |
| 154 | unsigned int target_sessionid[AUDIT_AUX_PIDS]; | 153 | unsigned int target_sessionid[AUDIT_AUX_PIDS]; |
| 155 | u32 target_sid[AUDIT_AUX_PIDS]; | 154 | u32 target_sid[AUDIT_AUX_PIDS]; |
| 156 | char target_comm[AUDIT_AUX_PIDS][TASK_COMM_LEN]; | 155 | char target_comm[AUDIT_AUX_PIDS][TASK_COMM_LEN]; |
| @@ -208,14 +207,14 @@ struct audit_context { | |||
| 208 | size_t sockaddr_len; | 207 | size_t sockaddr_len; |
| 209 | /* Save things to print about task_struct */ | 208 | /* Save things to print about task_struct */ |
| 210 | pid_t pid, ppid; | 209 | pid_t pid, ppid; |
| 211 | uid_t uid, euid, suid, fsuid; | 210 | kuid_t uid, euid, suid, fsuid; |
| 212 | gid_t gid, egid, sgid, fsgid; | 211 | kgid_t gid, egid, sgid, fsgid; |
| 213 | unsigned long personality; | 212 | unsigned long personality; |
| 214 | int arch; | 213 | int arch; |
| 215 | 214 | ||
| 216 | pid_t target_pid; | 215 | pid_t target_pid; |
| 217 | uid_t target_auid; | 216 | kuid_t target_auid; |
| 218 | uid_t target_uid; | 217 | kuid_t target_uid; |
| 219 | unsigned int target_sessionid; | 218 | unsigned int target_sessionid; |
| 220 | u32 target_sid; | 219 | u32 target_sid; |
| 221 | char target_comm[TASK_COMM_LEN]; | 220 | char target_comm[TASK_COMM_LEN]; |
| @@ -231,8 +230,8 @@ struct audit_context { | |||
| 231 | long args[6]; | 230 | long args[6]; |
| 232 | } socketcall; | 231 | } socketcall; |
| 233 | struct { | 232 | struct { |
| 234 | uid_t uid; | 233 | kuid_t uid; |
| 235 | gid_t gid; | 234 | kgid_t gid; |
| 236 | umode_t mode; | 235 | umode_t mode; |
| 237 | u32 osid; | 236 | u32 osid; |
| 238 | int has_perm; | 237 | int has_perm; |
| @@ -464,37 +463,47 @@ static int match_tree_refs(struct audit_context *ctx, struct audit_tree *tree) | |||
| 464 | return 0; | 463 | return 0; |
| 465 | } | 464 | } |
| 466 | 465 | ||
| 467 | static int audit_compare_id(uid_t uid1, | 466 | static int audit_compare_uid(kuid_t uid, |
| 468 | struct audit_names *name, | 467 | struct audit_names *name, |
| 469 | unsigned long name_offset, | 468 | struct audit_field *f, |
| 470 | struct audit_field *f, | 469 | struct audit_context *ctx) |
| 471 | struct audit_context *ctx) | ||
| 472 | { | 470 | { |
| 473 | struct audit_names *n; | 471 | struct audit_names *n; |
| 474 | unsigned long addr; | ||
| 475 | uid_t uid2; | ||
| 476 | int rc; | 472 | int rc; |
| 477 | 473 | ||
| 478 | BUILD_BUG_ON(sizeof(uid_t) != sizeof(gid_t)); | ||
| 479 | |||
| 480 | if (name) { | 474 | if (name) { |
| 481 | addr = (unsigned long)name; | 475 | rc = audit_uid_comparator(uid, f->op, name->uid); |
| 482 | addr += name_offset; | ||
| 483 | |||
| 484 | uid2 = *(uid_t *)addr; | ||
| 485 | rc = audit_comparator(uid1, f->op, uid2); | ||
| 486 | if (rc) | 476 | if (rc) |
| 487 | return rc; | 477 | return rc; |
| 488 | } | 478 | } |
| 489 | 479 | ||
| 490 | if (ctx) { | 480 | if (ctx) { |
| 491 | list_for_each_entry(n, &ctx->names_list, list) { | 481 | list_for_each_entry(n, &ctx->names_list, list) { |
| 492 | addr = (unsigned long)n; | 482 | rc = audit_uid_comparator(uid, f->op, n->uid); |
| 493 | addr += name_offset; | 483 | if (rc) |
| 494 | 484 | return rc; | |
| 495 | uid2 = *(uid_t *)addr; | 485 | } |
| 486 | } | ||
| 487 | return 0; | ||
| 488 | } | ||
| 496 | 489 | ||
| 497 | rc = audit_comparator(uid1, f->op, uid2); | 490 | static int audit_compare_gid(kgid_t gid, |
| 491 | struct audit_names *name, | ||
| 492 | struct audit_field *f, | ||
| 493 | struct audit_context *ctx) | ||
| 494 | { | ||
| 495 | struct audit_names *n; | ||
| 496 | int rc; | ||
| 497 | |||
| 498 | if (name) { | ||
| 499 | rc = audit_gid_comparator(gid, f->op, name->gid); | ||
| 500 | if (rc) | ||
| 501 | return rc; | ||
| 502 | } | ||
| 503 | |||
| 504 | if (ctx) { | ||
| 505 | list_for_each_entry(n, &ctx->names_list, list) { | ||
| 506 | rc = audit_gid_comparator(gid, f->op, n->gid); | ||
| 498 | if (rc) | 507 | if (rc) |
| 499 | return rc; | 508 | return rc; |
| 500 | } | 509 | } |
| @@ -511,80 +520,62 @@ static int audit_field_compare(struct task_struct *tsk, | |||
| 511 | switch (f->val) { | 520 | switch (f->val) { |
| 512 | /* process to file object comparisons */ | 521 | /* process to file object comparisons */ |
| 513 | case AUDIT_COMPARE_UID_TO_OBJ_UID: | 522 | case AUDIT_COMPARE_UID_TO_OBJ_UID: |
| 514 | return audit_compare_id(cred->uid, | 523 | return audit_compare_uid(cred->uid, name, f, ctx); |
| 515 | name, offsetof(struct audit_names, uid), | ||
| 516 | f, ctx); | ||
| 517 | case AUDIT_COMPARE_GID_TO_OBJ_GID: | 524 | case AUDIT_COMPARE_GID_TO_OBJ_GID: |
| 518 | return audit_compare_id(cred->gid, | 525 | return audit_compare_gid(cred->gid, name, f, ctx); |
| 519 | name, offsetof(struct audit_names, gid), | ||
| 520 | f, ctx); | ||
| 521 | case AUDIT_COMPARE_EUID_TO_OBJ_UID: | 526 | case AUDIT_COMPARE_EUID_TO_OBJ_UID: |
| 522 | return audit_compare_id(cred->euid, | 527 | return audit_compare_uid(cred->euid, name, f, ctx); |
| 523 | name, offsetof(struct audit_names, uid), | ||
| 524 | f, ctx); | ||
| 525 | case AUDIT_COMPARE_EGID_TO_OBJ_GID: | 528 | case AUDIT_COMPARE_EGID_TO_OBJ_GID: |
| 526 | return audit_compare_id(cred->egid, | 529 | return audit_compare_gid(cred->egid, name, f, ctx); |
| 527 | name, offsetof(struct audit_names, gid), | ||
| 528 | f, ctx); | ||
| 529 | case AUDIT_COMPARE_AUID_TO_OBJ_UID: | 530 | case AUDIT_COMPARE_AUID_TO_OBJ_UID: |
| 530 | return audit_compare_id(tsk->loginuid, | 531 | return audit_compare_uid(tsk->loginuid, name, f, ctx); |
| 531 | name, offsetof(struct audit_names, uid), | ||
| 532 | f, ctx); | ||
| 533 | case AUDIT_COMPARE_SUID_TO_OBJ_UID: | 532 | case AUDIT_COMPARE_SUID_TO_OBJ_UID: |
| 534 | return audit_compare_id(cred->suid, | 533 | return audit_compare_uid(cred->suid, name, f, ctx); |
| 535 | name, offsetof(struct audit_names, uid), | ||
| 536 | f, ctx); | ||
| 537 | case AUDIT_COMPARE_SGID_TO_OBJ_GID: | 534 | case AUDIT_COMPARE_SGID_TO_OBJ_GID: |
| 538 | return audit_compare_id(cred->sgid, | 535 | return audit_compare_gid(cred->sgid, name, f, ctx); |
| 539 | name, offsetof(struct audit_names, gid), | ||
| 540 | f, ctx); | ||
| 541 | case AUDIT_COMPARE_FSUID_TO_OBJ_UID: | 536 | case AUDIT_COMPARE_FSUID_TO_OBJ_UID: |
| 542 | return audit_compare_id(cred->fsuid, | 537 | return audit_compare_uid(cred->fsuid, name, f, ctx); |
| 543 | name, offsetof(struct audit_names, uid), | ||
| 544 | f, ctx); | ||
| 545 | case AUDIT_COMPARE_FSGID_TO_OBJ_GID: | 538 | case AUDIT_COMPARE_FSGID_TO_OBJ_GID: |
| 546 | return audit_compare_id(cred->fsgid, | 539 | return audit_compare_gid(cred->fsgid, name, f, ctx); |
| 547 | name, offsetof(struct audit_names, gid), | ||
| 548 | f, ctx); | ||
| 549 | /* uid comparisons */ | 540 | /* uid comparisons */ |
| 550 | case AUDIT_COMPARE_UID_TO_AUID: | 541 | case AUDIT_COMPARE_UID_TO_AUID: |
| 551 | return audit_comparator(cred->uid, f->op, tsk->loginuid); | 542 | return audit_uid_comparator(cred->uid, f->op, tsk->loginuid); |
| 552 | case AUDIT_COMPARE_UID_TO_EUID: | 543 | case AUDIT_COMPARE_UID_TO_EUID: |
| 553 | return audit_comparator(cred->uid, f->op, cred->euid); | 544 | return audit_uid_comparator(cred->uid, f->op, cred->euid); |
| 554 | case AUDIT_COMPARE_UID_TO_SUID: | 545 | case AUDIT_COMPARE_UID_TO_SUID: |
| 555 | return audit_comparator(cred->uid, f->op, cred->suid); | 546 | return audit_uid_comparator(cred->uid, f->op, cred->suid); |
| 556 | case AUDIT_COMPARE_UID_TO_FSUID: | 547 | case AUDIT_COMPARE_UID_TO_FSUID: |
| 557 | return audit_comparator(cred->uid, f->op, cred->fsuid); | 548 | return audit_uid_comparator(cred->uid, f->op, cred->fsuid); |
| 558 | /* auid comparisons */ | 549 | /* auid comparisons */ |
| 559 | case AUDIT_COMPARE_AUID_TO_EUID: | 550 | case AUDIT_COMPARE_AUID_TO_EUID: |
| 560 | return audit_comparator(tsk->loginuid, f->op, cred->euid); | 551 | return audit_uid_comparator(tsk->loginuid, f->op, cred->euid); |
| 561 | case AUDIT_COMPARE_AUID_TO_SUID: | 552 | case AUDIT_COMPARE_AUID_TO_SUID: |
| 562 | return audit_comparator(tsk->loginuid, f->op, cred->suid); | 553 | return audit_uid_comparator(tsk->loginuid, f->op, cred->suid); |
| 563 | case AUDIT_COMPARE_AUID_TO_FSUID: | 554 | case AUDIT_COMPARE_AUID_TO_FSUID: |
| 564 | return audit_comparator(tsk->loginuid, f->op, cred->fsuid); | 555 | return audit_uid_comparator(tsk->loginuid, f->op, cred->fsuid); |
| 565 | /* euid comparisons */ | 556 | /* euid comparisons */ |
| 566 | case AUDIT_COMPARE_EUID_TO_SUID: | 557 | case AUDIT_COMPARE_EUID_TO_SUID: |
| 567 | return audit_comparator(cred->euid, f->op, cred->suid); | 558 | return audit_uid_comparator(cred->euid, f->op, cred->suid); |
| 568 | case AUDIT_COMPARE_EUID_TO_FSUID: | 559 | case AUDIT_COMPARE_EUID_TO_FSUID: |
| 569 | return audit_comparator(cred->euid, f->op, cred->fsuid); | 560 | return audit_uid_comparator(cred->euid, f->op, cred->fsuid); |
| 570 | /* suid comparisons */ | 561 | /* suid comparisons */ |
| 571 | case AUDIT_COMPARE_SUID_TO_FSUID: | 562 | case AUDIT_COMPARE_SUID_TO_FSUID: |
| 572 | return audit_comparator(cred->suid, f->op, cred->fsuid); | 563 | return audit_uid_comparator(cred->suid, f->op, cred->fsuid); |
| 573 | /* gid comparisons */ | 564 | /* gid comparisons */ |
| 574 | case AUDIT_COMPARE_GID_TO_EGID: | 565 | case AUDIT_COMPARE_GID_TO_EGID: |
| 575 | return audit_comparator(cred->gid, f->op, cred->egid); | 566 | return audit_gid_comparator(cred->gid, f->op, cred->egid); |
| 576 | case AUDIT_COMPARE_GID_TO_SGID: | 567 | case AUDIT_COMPARE_GID_TO_SGID: |
| 577 | return audit_comparator(cred->gid, f->op, cred->sgid); | 568 | return audit_gid_comparator(cred->gid, f->op, cred->sgid); |
| 578 | case AUDIT_COMPARE_GID_TO_FSGID: | 569 | case AUDIT_COMPARE_GID_TO_FSGID: |
| 579 | return audit_comparator(cred->gid, f->op, cred->fsgid); | 570 | return audit_gid_comparator(cred->gid, f->op, cred->fsgid); |
| 580 | /* egid comparisons */ | 571 | /* egid comparisons */ |
| 581 | case AUDIT_COMPARE_EGID_TO_SGID: | 572 | case AUDIT_COMPARE_EGID_TO_SGID: |
| 582 | return audit_comparator(cred->egid, f->op, cred->sgid); | 573 | return audit_gid_comparator(cred->egid, f->op, cred->sgid); |
| 583 | case AUDIT_COMPARE_EGID_TO_FSGID: | 574 | case AUDIT_COMPARE_EGID_TO_FSGID: |
| 584 | return audit_comparator(cred->egid, f->op, cred->fsgid); | 575 | return audit_gid_comparator(cred->egid, f->op, cred->fsgid); |
| 585 | /* sgid comparison */ | 576 | /* sgid comparison */ |
| 586 | case AUDIT_COMPARE_SGID_TO_FSGID: | 577 | case AUDIT_COMPARE_SGID_TO_FSGID: |
| 587 | return audit_comparator(cred->sgid, f->op, cred->fsgid); | 578 | return audit_gid_comparator(cred->sgid, f->op, cred->fsgid); |
| 588 | default: | 579 | default: |
| 589 | WARN(1, "Missing AUDIT_COMPARE define. Report as a bug\n"); | 580 | WARN(1, "Missing AUDIT_COMPARE define. Report as a bug\n"); |
| 590 | return 0; | 581 | return 0; |
| @@ -630,28 +621,28 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
| 630 | } | 621 | } |
| 631 | break; | 622 | break; |
| 632 | case AUDIT_UID: | 623 | case AUDIT_UID: |
| 633 | result = audit_comparator(cred->uid, f->op, f->val); | 624 | result = audit_uid_comparator(cred->uid, f->op, f->uid); |
| 634 | break; | 625 | break; |
| 635 | case AUDIT_EUID: | 626 | case AUDIT_EUID: |
| 636 | result = audit_comparator(cred->euid, f->op, f->val); | 627 | result = audit_uid_comparator(cred->euid, f->op, f->uid); |
| 637 | break; | 628 | break; |
| 638 | case AUDIT_SUID: | 629 | case AUDIT_SUID: |
| 639 | result = audit_comparator(cred->suid, f->op, f->val); | 630 | result = audit_uid_comparator(cred->suid, f->op, f->uid); |
| 640 | break; | 631 | break; |
| 641 | case AUDIT_FSUID: | 632 | case AUDIT_FSUID: |
| 642 | result = audit_comparator(cred->fsuid, f->op, f->val); | 633 | result = audit_uid_comparator(cred->fsuid, f->op, f->uid); |
| 643 | break; | 634 | break; |
| 644 | case AUDIT_GID: | 635 | case AUDIT_GID: |
| 645 | result = audit_comparator(cred->gid, f->op, f->val); | 636 | result = audit_gid_comparator(cred->gid, f->op, f->gid); |
| 646 | break; | 637 | break; |
| 647 | case AUDIT_EGID: | 638 | case AUDIT_EGID: |
| 648 | result = audit_comparator(cred->egid, f->op, f->val); | 639 | result = audit_gid_comparator(cred->egid, f->op, f->gid); |
| 649 | break; | 640 | break; |
| 650 | case AUDIT_SGID: | 641 | case AUDIT_SGID: |
| 651 | result = audit_comparator(cred->sgid, f->op, f->val); | 642 | result = audit_gid_comparator(cred->sgid, f->op, f->gid); |
| 652 | break; | 643 | break; |
| 653 | case AUDIT_FSGID: | 644 | case AUDIT_FSGID: |
| 654 | result = audit_comparator(cred->fsgid, f->op, f->val); | 645 | result = audit_gid_comparator(cred->fsgid, f->op, f->gid); |
| 655 | break; | 646 | break; |
| 656 | case AUDIT_PERS: | 647 | case AUDIT_PERS: |
| 657 | result = audit_comparator(tsk->personality, f->op, f->val); | 648 | result = audit_comparator(tsk->personality, f->op, f->val); |
| @@ -717,10 +708,10 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
| 717 | break; | 708 | break; |
| 718 | case AUDIT_OBJ_UID: | 709 | case AUDIT_OBJ_UID: |
| 719 | if (name) { | 710 | if (name) { |
| 720 | result = audit_comparator(name->uid, f->op, f->val); | 711 | result = audit_uid_comparator(name->uid, f->op, f->uid); |
| 721 | } else if (ctx) { | 712 | } else if (ctx) { |
| 722 | list_for_each_entry(n, &ctx->names_list, list) { | 713 | list_for_each_entry(n, &ctx->names_list, list) { |
| 723 | if (audit_comparator(n->uid, f->op, f->val)) { | 714 | if (audit_uid_comparator(n->uid, f->op, f->uid)) { |
| 724 | ++result; | 715 | ++result; |
| 725 | break; | 716 | break; |
| 726 | } | 717 | } |
| @@ -729,10 +720,10 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
| 729 | break; | 720 | break; |
| 730 | case AUDIT_OBJ_GID: | 721 | case AUDIT_OBJ_GID: |
| 731 | if (name) { | 722 | if (name) { |
| 732 | result = audit_comparator(name->gid, f->op, f->val); | 723 | result = audit_gid_comparator(name->gid, f->op, f->gid); |
| 733 | } else if (ctx) { | 724 | } else if (ctx) { |
| 734 | list_for_each_entry(n, &ctx->names_list, list) { | 725 | list_for_each_entry(n, &ctx->names_list, list) { |
| 735 | if (audit_comparator(n->gid, f->op, f->val)) { | 726 | if (audit_gid_comparator(n->gid, f->op, f->gid)) { |
| 736 | ++result; | 727 | ++result; |
| 737 | break; | 728 | break; |
| 738 | } | 729 | } |
| @@ -750,7 +741,7 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
| 750 | case AUDIT_LOGINUID: | 741 | case AUDIT_LOGINUID: |
| 751 | result = 0; | 742 | result = 0; |
| 752 | if (ctx) | 743 | if (ctx) |
| 753 | result = audit_comparator(tsk->loginuid, f->op, f->val); | 744 | result = audit_uid_comparator(tsk->loginuid, f->op, f->uid); |
| 754 | break; | 745 | break; |
| 755 | case AUDIT_SUBJ_USER: | 746 | case AUDIT_SUBJ_USER: |
| 756 | case AUDIT_SUBJ_ROLE: | 747 | case AUDIT_SUBJ_ROLE: |
| @@ -1006,7 +997,7 @@ static inline void audit_free_names(struct audit_context *context) | |||
| 1006 | context->ino_count); | 997 | context->ino_count); |
| 1007 | list_for_each_entry(n, &context->names_list, list) { | 998 | list_for_each_entry(n, &context->names_list, list) { |
| 1008 | printk(KERN_ERR "names[%d] = %p = %s\n", i, | 999 | printk(KERN_ERR "names[%d] = %p = %s\n", i, |
| 1009 | n->name, n->name ?: "(null)"); | 1000 | n->name, n->name->name ?: "(null)"); |
| 1010 | } | 1001 | } |
| 1011 | dump_stack(); | 1002 | dump_stack(); |
| 1012 | return; | 1003 | return; |
| @@ -1154,13 +1145,43 @@ error_path: | |||
| 1154 | 1145 | ||
| 1155 | EXPORT_SYMBOL(audit_log_task_context); | 1146 | EXPORT_SYMBOL(audit_log_task_context); |
| 1156 | 1147 | ||
| 1157 | static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) | 1148 | void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) |
| 1158 | { | 1149 | { |
| 1150 | const struct cred *cred; | ||
| 1159 | char name[sizeof(tsk->comm)]; | 1151 | char name[sizeof(tsk->comm)]; |
| 1160 | struct mm_struct *mm = tsk->mm; | 1152 | struct mm_struct *mm = tsk->mm; |
| 1161 | struct vm_area_struct *vma; | 1153 | char *tty; |
| 1154 | |||
| 1155 | if (!ab) | ||
| 1156 | return; | ||
| 1162 | 1157 | ||
| 1163 | /* tsk == current */ | 1158 | /* tsk == current */ |
| 1159 | cred = current_cred(); | ||
| 1160 | |||
| 1161 | spin_lock_irq(&tsk->sighand->siglock); | ||
| 1162 | if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) | ||
| 1163 | tty = tsk->signal->tty->name; | ||
| 1164 | else | ||
| 1165 | tty = "(none)"; | ||
| 1166 | spin_unlock_irq(&tsk->sighand->siglock); | ||
| 1167 | |||
| 1168 | |||
| 1169 | audit_log_format(ab, | ||
| 1170 | " ppid=%ld pid=%d auid=%u uid=%u gid=%u" | ||
| 1171 | " euid=%u suid=%u fsuid=%u" | ||
| 1172 | " egid=%u sgid=%u fsgid=%u ses=%u tty=%s", | ||
| 1173 | sys_getppid(), | ||
| 1174 | tsk->pid, | ||
| 1175 | from_kuid(&init_user_ns, tsk->loginuid), | ||
| 1176 | from_kuid(&init_user_ns, cred->uid), | ||
| 1177 | from_kgid(&init_user_ns, cred->gid), | ||
| 1178 | from_kuid(&init_user_ns, cred->euid), | ||
| 1179 | from_kuid(&init_user_ns, cred->suid), | ||
| 1180 | from_kuid(&init_user_ns, cred->fsuid), | ||
| 1181 | from_kgid(&init_user_ns, cred->egid), | ||
| 1182 | from_kgid(&init_user_ns, cred->sgid), | ||
| 1183 | from_kgid(&init_user_ns, cred->fsgid), | ||
| 1184 | tsk->sessionid, tty); | ||
| 1164 | 1185 | ||
| 1165 | get_task_comm(name, tsk); | 1186 | get_task_comm(name, tsk); |
| 1166 | audit_log_format(ab, " comm="); | 1187 | audit_log_format(ab, " comm="); |
| @@ -1168,23 +1189,17 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk | |||
| 1168 | 1189 | ||
| 1169 | if (mm) { | 1190 | if (mm) { |
| 1170 | down_read(&mm->mmap_sem); | 1191 | down_read(&mm->mmap_sem); |
| 1171 | vma = mm->mmap; | 1192 | if (mm->exe_file) |
| 1172 | while (vma) { | 1193 | audit_log_d_path(ab, " exe=", &mm->exe_file->f_path); |
| 1173 | if ((vma->vm_flags & VM_EXECUTABLE) && | ||
| 1174 | vma->vm_file) { | ||
| 1175 | audit_log_d_path(ab, " exe=", | ||
| 1176 | &vma->vm_file->f_path); | ||
| 1177 | break; | ||
| 1178 | } | ||
| 1179 | vma = vma->vm_next; | ||
| 1180 | } | ||
| 1181 | up_read(&mm->mmap_sem); | 1194 | up_read(&mm->mmap_sem); |
| 1182 | } | 1195 | } |
| 1183 | audit_log_task_context(ab); | 1196 | audit_log_task_context(ab); |
| 1184 | } | 1197 | } |
| 1185 | 1198 | ||
| 1199 | EXPORT_SYMBOL(audit_log_task_info); | ||
| 1200 | |||
| 1186 | static int audit_log_pid_context(struct audit_context *context, pid_t pid, | 1201 | static int audit_log_pid_context(struct audit_context *context, pid_t pid, |
| 1187 | uid_t auid, uid_t uid, unsigned int sessionid, | 1202 | kuid_t auid, kuid_t uid, unsigned int sessionid, |
| 1188 | u32 sid, char *comm) | 1203 | u32 sid, char *comm) |
| 1189 | { | 1204 | { |
| 1190 | struct audit_buffer *ab; | 1205 | struct audit_buffer *ab; |
| @@ -1196,8 +1211,9 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, | |||
| 1196 | if (!ab) | 1211 | if (!ab) |
| 1197 | return rc; | 1212 | return rc; |
| 1198 | 1213 | ||
| 1199 | audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid, auid, | 1214 | audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid, |
| 1200 | uid, sessionid); | 1215 | from_kuid(&init_user_ns, auid), |
| 1216 | from_kuid(&init_user_ns, uid), sessionid); | ||
| 1201 | if (security_secid_to_secctx(sid, &ctx, &len)) { | 1217 | if (security_secid_to_secctx(sid, &ctx, &len)) { |
| 1202 | audit_log_format(ab, " obj=(none)"); | 1218 | audit_log_format(ab, " obj=(none)"); |
| 1203 | rc = 1; | 1219 | rc = 1; |
| @@ -1447,7 +1463,9 @@ static void show_special(struct audit_context *context, int *call_panic) | |||
| 1447 | u32 osid = context->ipc.osid; | 1463 | u32 osid = context->ipc.osid; |
| 1448 | 1464 | ||
| 1449 | audit_log_format(ab, "ouid=%u ogid=%u mode=%#ho", | 1465 | audit_log_format(ab, "ouid=%u ogid=%u mode=%#ho", |
| 1450 | context->ipc.uid, context->ipc.gid, context->ipc.mode); | 1466 | from_kuid(&init_user_ns, context->ipc.uid), |
| 1467 | from_kgid(&init_user_ns, context->ipc.gid), | ||
| 1468 | context->ipc.mode); | ||
| 1451 | if (osid) { | 1469 | if (osid) { |
| 1452 | char *ctx = NULL; | 1470 | char *ctx = NULL; |
| 1453 | u32 len; | 1471 | u32 len; |
| @@ -1536,7 +1554,7 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n, | |||
| 1536 | case AUDIT_NAME_FULL: | 1554 | case AUDIT_NAME_FULL: |
| 1537 | /* log the full path */ | 1555 | /* log the full path */ |
| 1538 | audit_log_format(ab, " name="); | 1556 | audit_log_format(ab, " name="); |
| 1539 | audit_log_untrustedstring(ab, n->name); | 1557 | audit_log_untrustedstring(ab, n->name->name); |
| 1540 | break; | 1558 | break; |
| 1541 | case 0: | 1559 | case 0: |
| 1542 | /* name was specified as a relative path and the | 1560 | /* name was specified as a relative path and the |
| @@ -1546,7 +1564,7 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n, | |||
| 1546 | default: | 1564 | default: |
| 1547 | /* log the name's directory component */ | 1565 | /* log the name's directory component */ |
| 1548 | audit_log_format(ab, " name="); | 1566 | audit_log_format(ab, " name="); |
| 1549 | audit_log_n_untrustedstring(ab, n->name, | 1567 | audit_log_n_untrustedstring(ab, n->name->name, |
| 1550 | n->name_len); | 1568 | n->name_len); |
| 1551 | } | 1569 | } |
| 1552 | } else | 1570 | } else |
| @@ -1560,8 +1578,8 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n, | |||
| 1560 | MAJOR(n->dev), | 1578 | MAJOR(n->dev), |
| 1561 | MINOR(n->dev), | 1579 | MINOR(n->dev), |
| 1562 | n->mode, | 1580 | n->mode, |
| 1563 | n->uid, | 1581 | from_kuid(&init_user_ns, n->uid), |
| 1564 | n->gid, | 1582 | from_kgid(&init_user_ns, n->gid), |
| 1565 | MAJOR(n->rdev), | 1583 | MAJOR(n->rdev), |
| 1566 | MINOR(n->rdev)); | 1584 | MINOR(n->rdev)); |
| 1567 | } | 1585 | } |
| @@ -1585,26 +1603,12 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n, | |||
| 1585 | 1603 | ||
| 1586 | static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) | 1604 | static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) |
| 1587 | { | 1605 | { |
| 1588 | const struct cred *cred; | ||
| 1589 | int i, call_panic = 0; | 1606 | int i, call_panic = 0; |
| 1590 | struct audit_buffer *ab; | 1607 | struct audit_buffer *ab; |
| 1591 | struct audit_aux_data *aux; | 1608 | struct audit_aux_data *aux; |
| 1592 | const char *tty; | ||
| 1593 | struct audit_names *n; | 1609 | struct audit_names *n; |
| 1594 | 1610 | ||
| 1595 | /* tsk == current */ | 1611 | /* tsk == current */ |
| 1596 | context->pid = tsk->pid; | ||
| 1597 | if (!context->ppid) | ||
| 1598 | context->ppid = sys_getppid(); | ||
| 1599 | cred = current_cred(); | ||
| 1600 | context->uid = cred->uid; | ||
| 1601 | context->gid = cred->gid; | ||
| 1602 | context->euid = cred->euid; | ||
| 1603 | context->suid = cred->suid; | ||
| 1604 | context->fsuid = cred->fsuid; | ||
| 1605 | context->egid = cred->egid; | ||
| 1606 | context->sgid = cred->sgid; | ||
| 1607 | context->fsgid = cred->fsgid; | ||
| 1608 | context->personality = tsk->personality; | 1612 | context->personality = tsk->personality; |
| 1609 | 1613 | ||
| 1610 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); | 1614 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); |
| @@ -1619,32 +1623,13 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
| 1619 | (context->return_valid==AUDITSC_SUCCESS)?"yes":"no", | 1623 | (context->return_valid==AUDITSC_SUCCESS)?"yes":"no", |
| 1620 | context->return_code); | 1624 | context->return_code); |
| 1621 | 1625 | ||
| 1622 | spin_lock_irq(&tsk->sighand->siglock); | ||
| 1623 | if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) | ||
| 1624 | tty = tsk->signal->tty->name; | ||
| 1625 | else | ||
| 1626 | tty = "(none)"; | ||
| 1627 | spin_unlock_irq(&tsk->sighand->siglock); | ||
| 1628 | |||
| 1629 | audit_log_format(ab, | 1626 | audit_log_format(ab, |
| 1630 | " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" | 1627 | " a0=%lx a1=%lx a2=%lx a3=%lx items=%d", |
| 1631 | " ppid=%d pid=%d auid=%u uid=%u gid=%u" | 1628 | context->argv[0], |
| 1632 | " euid=%u suid=%u fsuid=%u" | 1629 | context->argv[1], |
| 1633 | " egid=%u sgid=%u fsgid=%u tty=%s ses=%u", | 1630 | context->argv[2], |
| 1634 | context->argv[0], | 1631 | context->argv[3], |
| 1635 | context->argv[1], | 1632 | context->name_count); |
| 1636 | context->argv[2], | ||
| 1637 | context->argv[3], | ||
| 1638 | context->name_count, | ||
| 1639 | context->ppid, | ||
| 1640 | context->pid, | ||
| 1641 | tsk->loginuid, | ||
| 1642 | context->uid, | ||
| 1643 | context->gid, | ||
| 1644 | context->euid, context->suid, context->fsuid, | ||
| 1645 | context->egid, context->sgid, context->fsgid, tty, | ||
| 1646 | tsk->sessionid); | ||
| 1647 | |||
| 1648 | 1633 | ||
| 1649 | audit_log_task_info(ab, tsk); | 1634 | audit_log_task_info(ab, tsk); |
| 1650 | audit_log_key(ab, context->filterkey); | 1635 | audit_log_key(ab, context->filterkey); |
| @@ -2009,7 +1994,8 @@ retry: | |||
| 2009 | #endif | 1994 | #endif |
| 2010 | } | 1995 | } |
| 2011 | 1996 | ||
| 2012 | static struct audit_names *audit_alloc_name(struct audit_context *context) | 1997 | static struct audit_names *audit_alloc_name(struct audit_context *context, |
| 1998 | unsigned char type) | ||
| 2013 | { | 1999 | { |
| 2014 | struct audit_names *aname; | 2000 | struct audit_names *aname; |
| 2015 | 2001 | ||
| @@ -2024,6 +2010,7 @@ static struct audit_names *audit_alloc_name(struct audit_context *context) | |||
| 2024 | } | 2010 | } |
| 2025 | 2011 | ||
| 2026 | aname->ino = (unsigned long)-1; | 2012 | aname->ino = (unsigned long)-1; |
| 2013 | aname->type = type; | ||
| 2027 | list_add_tail(&aname->list, &context->names_list); | 2014 | list_add_tail(&aname->list, &context->names_list); |
| 2028 | 2015 | ||
| 2029 | context->name_count++; | 2016 | context->name_count++; |
| @@ -2034,13 +2021,36 @@ static struct audit_names *audit_alloc_name(struct audit_context *context) | |||
| 2034 | } | 2021 | } |
| 2035 | 2022 | ||
| 2036 | /** | 2023 | /** |
| 2024 | * audit_reusename - fill out filename with info from existing entry | ||
| 2025 | * @uptr: userland ptr to pathname | ||
| 2026 | * | ||
| 2027 | * Search the audit_names list for the current audit context. If there is an | ||
| 2028 | * existing entry with a matching "uptr" then return the filename | ||
| 2029 | * associated with that audit_name. If not, return NULL. | ||
| 2030 | */ | ||
| 2031 | struct filename * | ||
| 2032 | __audit_reusename(const __user char *uptr) | ||
| 2033 | { | ||
| 2034 | struct audit_context *context = current->audit_context; | ||
| 2035 | struct audit_names *n; | ||
| 2036 | |||
| 2037 | list_for_each_entry(n, &context->names_list, list) { | ||
| 2038 | if (!n->name) | ||
| 2039 | continue; | ||
| 2040 | if (n->name->uptr == uptr) | ||
| 2041 | return n->name; | ||
| 2042 | } | ||
| 2043 | return NULL; | ||
| 2044 | } | ||
| 2045 | |||
| 2046 | /** | ||
| 2037 | * audit_getname - add a name to the list | 2047 | * audit_getname - add a name to the list |
| 2038 | * @name: name to add | 2048 | * @name: name to add |
| 2039 | * | 2049 | * |
| 2040 | * Add a name to the list of audit names for this context. | 2050 | * Add a name to the list of audit names for this context. |
| 2041 | * Called from fs/namei.c:getname(). | 2051 | * Called from fs/namei.c:getname(). |
| 2042 | */ | 2052 | */ |
| 2043 | void __audit_getname(const char *name) | 2053 | void __audit_getname(struct filename *name) |
| 2044 | { | 2054 | { |
| 2045 | struct audit_context *context = current->audit_context; | 2055 | struct audit_context *context = current->audit_context; |
| 2046 | struct audit_names *n; | 2056 | struct audit_names *n; |
| @@ -2054,13 +2064,19 @@ void __audit_getname(const char *name) | |||
| 2054 | return; | 2064 | return; |
| 2055 | } | 2065 | } |
| 2056 | 2066 | ||
| 2057 | n = audit_alloc_name(context); | 2067 | #if AUDIT_DEBUG |
| 2068 | /* The filename _must_ have a populated ->name */ | ||
| 2069 | BUG_ON(!name->name); | ||
| 2070 | #endif | ||
| 2071 | |||
| 2072 | n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); | ||
| 2058 | if (!n) | 2073 | if (!n) |
| 2059 | return; | 2074 | return; |
| 2060 | 2075 | ||
| 2061 | n->name = name; | 2076 | n->name = name; |
| 2062 | n->name_len = AUDIT_NAME_FULL; | 2077 | n->name_len = AUDIT_NAME_FULL; |
| 2063 | n->name_put = true; | 2078 | n->name_put = true; |
| 2079 | name->aname = n; | ||
| 2064 | 2080 | ||
| 2065 | if (!context->pwd.dentry) | 2081 | if (!context->pwd.dentry) |
| 2066 | get_fs_pwd(current->fs, &context->pwd); | 2082 | get_fs_pwd(current->fs, &context->pwd); |
| @@ -2073,7 +2089,7 @@ void __audit_getname(const char *name) | |||
| 2073 | * then we delay the putname until syscall exit. | 2089 | * then we delay the putname until syscall exit. |
| 2074 | * Called from include/linux/fs.h:putname(). | 2090 | * Called from include/linux/fs.h:putname(). |
| 2075 | */ | 2091 | */ |
| 2076 | void audit_putname(const char *name) | 2092 | void audit_putname(struct filename *name) |
| 2077 | { | 2093 | { |
| 2078 | struct audit_context *context = current->audit_context; | 2094 | struct audit_context *context = current->audit_context; |
| 2079 | 2095 | ||
| @@ -2088,7 +2104,7 @@ void audit_putname(const char *name) | |||
| 2088 | 2104 | ||
| 2089 | list_for_each_entry(n, &context->names_list, list) | 2105 | list_for_each_entry(n, &context->names_list, list) |
| 2090 | printk(KERN_ERR "name[%d] = %p = %s\n", i, | 2106 | printk(KERN_ERR "name[%d] = %p = %s\n", i, |
| 2091 | n->name, n->name ?: "(null)"); | 2107 | n->name, n->name->name ?: "(null)"); |
| 2092 | } | 2108 | } |
| 2093 | #endif | 2109 | #endif |
| 2094 | __putname(name); | 2110 | __putname(name); |
| @@ -2102,8 +2118,8 @@ void audit_putname(const char *name) | |||
| 2102 | " put_count=%d\n", | 2118 | " put_count=%d\n", |
| 2103 | __FILE__, __LINE__, | 2119 | __FILE__, __LINE__, |
| 2104 | context->serial, context->major, | 2120 | context->serial, context->major, |
| 2105 | context->in_syscall, name, context->name_count, | 2121 | context->in_syscall, name->name, |
| 2106 | context->put_count); | 2122 | context->name_count, context->put_count); |
| 2107 | dump_stack(); | 2123 | dump_stack(); |
| 2108 | } | 2124 | } |
| 2109 | } | 2125 | } |
| @@ -2146,13 +2162,13 @@ static void audit_copy_inode(struct audit_names *name, const struct dentry *dent | |||
| 2146 | } | 2162 | } |
| 2147 | 2163 | ||
| 2148 | /** | 2164 | /** |
| 2149 | * audit_inode - store the inode and device from a lookup | 2165 | * __audit_inode - store the inode and device from a lookup |
| 2150 | * @name: name being audited | 2166 | * @name: name being audited |
| 2151 | * @dentry: dentry being audited | 2167 | * @dentry: dentry being audited |
| 2152 | * | 2168 | * @parent: does this dentry represent the parent? |
| 2153 | * Called from fs/namei.c:path_lookup(). | ||
| 2154 | */ | 2169 | */ |
| 2155 | void __audit_inode(const char *name, const struct dentry *dentry) | 2170 | void __audit_inode(struct filename *name, const struct dentry *dentry, |
| 2171 | unsigned int parent) | ||
| 2156 | { | 2172 | { |
| 2157 | struct audit_context *context = current->audit_context; | 2173 | struct audit_context *context = current->audit_context; |
| 2158 | const struct inode *inode = dentry->d_inode; | 2174 | const struct inode *inode = dentry->d_inode; |
| @@ -2161,24 +2177,69 @@ void __audit_inode(const char *name, const struct dentry *dentry) | |||
| 2161 | if (!context->in_syscall) | 2177 | if (!context->in_syscall) |
| 2162 | return; | 2178 | return; |
| 2163 | 2179 | ||
| 2180 | if (!name) | ||
| 2181 | goto out_alloc; | ||
| 2182 | |||
| 2183 | #if AUDIT_DEBUG | ||
| 2184 | /* The struct filename _must_ have a populated ->name */ | ||
| 2185 | BUG_ON(!name->name); | ||
| 2186 | #endif | ||
| 2187 | /* | ||
| 2188 | * If we have a pointer to an audit_names entry already, then we can | ||
| 2189 | * just use it directly if the type is correct. | ||
| 2190 | */ | ||
| 2191 | n = name->aname; | ||
| 2192 | if (n) { | ||
| 2193 | if (parent) { | ||
| 2194 | if (n->type == AUDIT_TYPE_PARENT || | ||
| 2195 | n->type == AUDIT_TYPE_UNKNOWN) | ||
| 2196 | goto out; | ||
| 2197 | } else { | ||
| 2198 | if (n->type != AUDIT_TYPE_PARENT) | ||
| 2199 | goto out; | ||
| 2200 | } | ||
| 2201 | } | ||
| 2202 | |||
| 2164 | list_for_each_entry_reverse(n, &context->names_list, list) { | 2203 | list_for_each_entry_reverse(n, &context->names_list, list) { |
| 2165 | if (n->name && (n->name == name)) | 2204 | /* does the name pointer match? */ |
| 2166 | goto out; | 2205 | if (!n->name || n->name->name != name->name) |
| 2206 | continue; | ||
| 2207 | |||
| 2208 | /* match the correct record type */ | ||
| 2209 | if (parent) { | ||
| 2210 | if (n->type == AUDIT_TYPE_PARENT || | ||
| 2211 | n->type == AUDIT_TYPE_UNKNOWN) | ||
| 2212 | goto out; | ||
| 2213 | } else { | ||
| 2214 | if (n->type != AUDIT_TYPE_PARENT) | ||
| 2215 | goto out; | ||
| 2216 | } | ||
| 2167 | } | 2217 | } |
| 2168 | 2218 | ||
| 2169 | /* unable to find the name from a previous getname() */ | 2219 | out_alloc: |
| 2170 | n = audit_alloc_name(context); | 2220 | /* unable to find the name from a previous getname(). Allocate a new |
| 2221 | * anonymous entry. | ||
| 2222 | */ | ||
| 2223 | n = audit_alloc_name(context, AUDIT_TYPE_NORMAL); | ||
| 2171 | if (!n) | 2224 | if (!n) |
| 2172 | return; | 2225 | return; |
| 2173 | out: | 2226 | out: |
| 2227 | if (parent) { | ||
| 2228 | n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL; | ||
| 2229 | n->type = AUDIT_TYPE_PARENT; | ||
| 2230 | } else { | ||
| 2231 | n->name_len = AUDIT_NAME_FULL; | ||
| 2232 | n->type = AUDIT_TYPE_NORMAL; | ||
| 2233 | } | ||
| 2174 | handle_path(dentry); | 2234 | handle_path(dentry); |
| 2175 | audit_copy_inode(n, dentry, inode); | 2235 | audit_copy_inode(n, dentry, inode); |
| 2176 | } | 2236 | } |
| 2177 | 2237 | ||
| 2178 | /** | 2238 | /** |
| 2179 | * audit_inode_child - collect inode info for created/removed objects | 2239 | * __audit_inode_child - collect inode info for created/removed objects |
| 2180 | * @dentry: dentry being audited | ||
| 2181 | * @parent: inode of dentry parent | 2240 | * @parent: inode of dentry parent |
| 2241 | * @dentry: dentry being audited | ||
| 2242 | * @type: AUDIT_TYPE_* value that we're looking for | ||
| 2182 | * | 2243 | * |
| 2183 | * For syscalls that create or remove filesystem objects, audit_inode | 2244 | * For syscalls that create or remove filesystem objects, audit_inode |
| 2184 | * can only collect information for the filesystem object's parent. | 2245 | * can only collect information for the filesystem object's parent. |
| @@ -2188,15 +2249,14 @@ out: | |||
| 2188 | * must be hooked prior, in order to capture the target inode during | 2249 | * must be hooked prior, in order to capture the target inode during |
| 2189 | * unsuccessful attempts. | 2250 | * unsuccessful attempts. |
| 2190 | */ | 2251 | */ |
| 2191 | void __audit_inode_child(const struct dentry *dentry, | 2252 | void __audit_inode_child(const struct inode *parent, |
| 2192 | const struct inode *parent) | 2253 | const struct dentry *dentry, |
| 2254 | const unsigned char type) | ||
| 2193 | { | 2255 | { |
| 2194 | struct audit_context *context = current->audit_context; | 2256 | struct audit_context *context = current->audit_context; |
| 2195 | const char *found_parent = NULL, *found_child = NULL; | ||
| 2196 | const struct inode *inode = dentry->d_inode; | 2257 | const struct inode *inode = dentry->d_inode; |
| 2197 | const char *dname = dentry->d_name.name; | 2258 | const char *dname = dentry->d_name.name; |
| 2198 | struct audit_names *n; | 2259 | struct audit_names *n, *found_parent = NULL, *found_child = NULL; |
| 2199 | int dirlen = 0; | ||
| 2200 | 2260 | ||
| 2201 | if (!context->in_syscall) | 2261 | if (!context->in_syscall) |
| 2202 | return; | 2262 | return; |
| @@ -2204,62 +2264,65 @@ void __audit_inode_child(const struct dentry *dentry, | |||
| 2204 | if (inode) | 2264 | if (inode) |
| 2205 | handle_one(inode); | 2265 | handle_one(inode); |
| 2206 | 2266 | ||
| 2207 | /* parent is more likely, look for it first */ | 2267 | /* look for a parent entry first */ |
| 2208 | list_for_each_entry(n, &context->names_list, list) { | 2268 | list_for_each_entry(n, &context->names_list, list) { |
| 2209 | if (!n->name) | 2269 | if (!n->name || n->type != AUDIT_TYPE_PARENT) |
| 2210 | continue; | 2270 | continue; |
| 2211 | 2271 | ||
| 2212 | if (n->ino == parent->i_ino && | 2272 | if (n->ino == parent->i_ino && |
| 2213 | !audit_compare_dname_path(dname, n->name, &dirlen)) { | 2273 | !audit_compare_dname_path(dname, n->name->name, n->name_len)) { |
| 2214 | n->name_len = dirlen; /* update parent data in place */ | 2274 | found_parent = n; |
| 2215 | found_parent = n->name; | 2275 | break; |
| 2216 | goto add_names; | ||
| 2217 | } | 2276 | } |
| 2218 | } | 2277 | } |
| 2219 | 2278 | ||
| 2220 | /* no matching parent, look for matching child */ | 2279 | /* is there a matching child entry? */ |
| 2221 | list_for_each_entry(n, &context->names_list, list) { | 2280 | list_for_each_entry(n, &context->names_list, list) { |
| 2222 | if (!n->name) | 2281 | /* can only match entries that have a name */ |
| 2282 | if (!n->name || n->type != type) | ||
| 2223 | continue; | 2283 | continue; |
| 2224 | 2284 | ||
| 2225 | /* strcmp() is the more likely scenario */ | 2285 | /* if we found a parent, make sure this one is a child of it */ |
| 2226 | if (!strcmp(dname, n->name) || | 2286 | if (found_parent && (n->name != found_parent->name)) |
| 2227 | !audit_compare_dname_path(dname, n->name, &dirlen)) { | 2287 | continue; |
| 2228 | if (inode) | 2288 | |
| 2229 | audit_copy_inode(n, NULL, inode); | 2289 | if (!strcmp(dname, n->name->name) || |
| 2230 | else | 2290 | !audit_compare_dname_path(dname, n->name->name, |
| 2231 | n->ino = (unsigned long)-1; | 2291 | found_parent ? |
| 2232 | found_child = n->name; | 2292 | found_parent->name_len : |
| 2233 | goto add_names; | 2293 | AUDIT_NAME_FULL)) { |
| 2294 | found_child = n; | ||
| 2295 | break; | ||
| 2234 | } | 2296 | } |
| 2235 | } | 2297 | } |
| 2236 | 2298 | ||
| 2237 | add_names: | ||
| 2238 | if (!found_parent) { | 2299 | if (!found_parent) { |
| 2239 | n = audit_alloc_name(context); | 2300 | /* create a new, "anonymous" parent record */ |
| 2301 | n = audit_alloc_name(context, AUDIT_TYPE_PARENT); | ||
| 2240 | if (!n) | 2302 | if (!n) |
| 2241 | return; | 2303 | return; |
| 2242 | audit_copy_inode(n, NULL, parent); | 2304 | audit_copy_inode(n, NULL, parent); |
| 2243 | } | 2305 | } |
| 2244 | 2306 | ||
| 2245 | if (!found_child) { | 2307 | if (!found_child) { |
| 2246 | n = audit_alloc_name(context); | 2308 | found_child = audit_alloc_name(context, type); |
| 2247 | if (!n) | 2309 | if (!found_child) |
| 2248 | return; | 2310 | return; |
| 2249 | 2311 | ||
| 2250 | /* Re-use the name belonging to the slot for a matching parent | 2312 | /* Re-use the name belonging to the slot for a matching parent |
| 2251 | * directory. All names for this context are relinquished in | 2313 | * directory. All names for this context are relinquished in |
| 2252 | * audit_free_names() */ | 2314 | * audit_free_names() */ |
| 2253 | if (found_parent) { | 2315 | if (found_parent) { |
| 2254 | n->name = found_parent; | 2316 | found_child->name = found_parent->name; |
| 2255 | n->name_len = AUDIT_NAME_FULL; | 2317 | found_child->name_len = AUDIT_NAME_FULL; |
| 2256 | /* don't call __putname() */ | 2318 | /* don't call __putname() */ |
| 2257 | n->name_put = false; | 2319 | found_child->name_put = false; |
| 2258 | } | 2320 | } |
| 2259 | |||
| 2260 | if (inode) | ||
| 2261 | audit_copy_inode(n, NULL, inode); | ||
| 2262 | } | 2321 | } |
| 2322 | if (inode) | ||
| 2323 | audit_copy_inode(found_child, dentry, inode); | ||
| 2324 | else | ||
| 2325 | found_child->ino = (unsigned long)-1; | ||
| 2263 | } | 2326 | } |
| 2264 | EXPORT_SYMBOL_GPL(__audit_inode_child); | 2327 | EXPORT_SYMBOL_GPL(__audit_inode_child); |
| 2265 | 2328 | ||
| @@ -2299,14 +2362,14 @@ static atomic_t session_id = ATOMIC_INIT(0); | |||
| 2299 | * | 2362 | * |
| 2300 | * Called (set) from fs/proc/base.c::proc_loginuid_write(). | 2363 | * Called (set) from fs/proc/base.c::proc_loginuid_write(). |
| 2301 | */ | 2364 | */ |
| 2302 | int audit_set_loginuid(uid_t loginuid) | 2365 | int audit_set_loginuid(kuid_t loginuid) |
| 2303 | { | 2366 | { |
| 2304 | struct task_struct *task = current; | 2367 | struct task_struct *task = current; |
| 2305 | struct audit_context *context = task->audit_context; | 2368 | struct audit_context *context = task->audit_context; |
| 2306 | unsigned int sessionid; | 2369 | unsigned int sessionid; |
| 2307 | 2370 | ||
| 2308 | #ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE | 2371 | #ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE |
| 2309 | if (task->loginuid != -1) | 2372 | if (uid_valid(task->loginuid)) |
| 2310 | return -EPERM; | 2373 | return -EPERM; |
| 2311 | #else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */ | 2374 | #else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */ |
| 2312 | if (!capable(CAP_AUDIT_CONTROL)) | 2375 | if (!capable(CAP_AUDIT_CONTROL)) |
| @@ -2322,8 +2385,10 @@ int audit_set_loginuid(uid_t loginuid) | |||
| 2322 | audit_log_format(ab, "login pid=%d uid=%u " | 2385 | audit_log_format(ab, "login pid=%d uid=%u " |
| 2323 | "old auid=%u new auid=%u" | 2386 | "old auid=%u new auid=%u" |
| 2324 | " old ses=%u new ses=%u", | 2387 | " old ses=%u new ses=%u", |
| 2325 | task->pid, task_uid(task), | 2388 | task->pid, |
| 2326 | task->loginuid, loginuid, | 2389 | from_kuid(&init_user_ns, task_uid(task)), |
| 2390 | from_kuid(&init_user_ns, task->loginuid), | ||
| 2391 | from_kuid(&init_user_ns, loginuid), | ||
| 2327 | task->sessionid, sessionid); | 2392 | task->sessionid, sessionid); |
| 2328 | audit_log_end(ab); | 2393 | audit_log_end(ab); |
| 2329 | } | 2394 | } |
| @@ -2546,12 +2611,12 @@ int __audit_signal_info(int sig, struct task_struct *t) | |||
| 2546 | struct audit_aux_data_pids *axp; | 2611 | struct audit_aux_data_pids *axp; |
| 2547 | struct task_struct *tsk = current; | 2612 | struct task_struct *tsk = current; |
| 2548 | struct audit_context *ctx = tsk->audit_context; | 2613 | struct audit_context *ctx = tsk->audit_context; |
| 2549 | uid_t uid = current_uid(), t_uid = task_uid(t); | 2614 | kuid_t uid = current_uid(), t_uid = task_uid(t); |
| 2550 | 2615 | ||
| 2551 | if (audit_pid && t->tgid == audit_pid) { | 2616 | if (audit_pid && t->tgid == audit_pid) { |
| 2552 | if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) { | 2617 | if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) { |
| 2553 | audit_sig_pid = tsk->pid; | 2618 | audit_sig_pid = tsk->pid; |
| 2554 | if (tsk->loginuid != -1) | 2619 | if (uid_valid(tsk->loginuid)) |
| 2555 | audit_sig_uid = tsk->loginuid; | 2620 | audit_sig_uid = tsk->loginuid; |
| 2556 | else | 2621 | else |
| 2557 | audit_sig_uid = uid; | 2622 | audit_sig_uid = uid; |
| @@ -2672,8 +2737,8 @@ void __audit_mmap_fd(int fd, int flags) | |||
| 2672 | 2737 | ||
| 2673 | static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) | 2738 | static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) |
| 2674 | { | 2739 | { |
| 2675 | uid_t auid, uid; | 2740 | kuid_t auid, uid; |
| 2676 | gid_t gid; | 2741 | kgid_t gid; |
| 2677 | unsigned int sessionid; | 2742 | unsigned int sessionid; |
| 2678 | 2743 | ||
| 2679 | auid = audit_get_loginuid(current); | 2744 | auid = audit_get_loginuid(current); |
| @@ -2681,7 +2746,10 @@ static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) | |||
| 2681 | current_uid_gid(&uid, &gid); | 2746 | current_uid_gid(&uid, &gid); |
| 2682 | 2747 | ||
| 2683 | audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u", | 2748 | audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u", |
| 2684 | auid, uid, gid, sessionid); | 2749 | from_kuid(&init_user_ns, auid), |
| 2750 | from_kuid(&init_user_ns, uid), | ||
| 2751 | from_kgid(&init_user_ns, gid), | ||
| 2752 | sessionid); | ||
| 2685 | audit_log_task_context(ab); | 2753 | audit_log_task_context(ab); |
| 2686 | audit_log_format(ab, " pid=%d comm=", current->pid); | 2754 | audit_log_format(ab, " pid=%d comm=", current->pid); |
| 2687 | audit_log_untrustedstring(ab, current->comm); | 2755 | audit_log_untrustedstring(ab, current->comm); |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 79818507e444..13774b3b39aa 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -88,11 +88,12 @@ static DEFINE_MUTEX(cgroup_root_mutex); | |||
| 88 | 88 | ||
| 89 | /* | 89 | /* |
| 90 | * Generate an array of cgroup subsystem pointers. At boot time, this is | 90 | * Generate an array of cgroup subsystem pointers. At boot time, this is |
| 91 | * populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are | 91 | * populated with the built in subsystems, and modular subsystems are |
| 92 | * registered after that. The mutable section of this array is protected by | 92 | * registered after that. The mutable section of this array is protected by |
| 93 | * cgroup_mutex. | 93 | * cgroup_mutex. |
| 94 | */ | 94 | */ |
| 95 | #define SUBSYS(_x) &_x ## _subsys, | 95 | #define SUBSYS(_x) [_x ## _subsys_id] = &_x ## _subsys, |
| 96 | #define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) | ||
| 96 | static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = { | 97 | static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = { |
| 97 | #include <linux/cgroup_subsys.h> | 98 | #include <linux/cgroup_subsys.h> |
| 98 | }; | 99 | }; |
| @@ -111,13 +112,13 @@ struct cgroupfs_root { | |||
| 111 | * The bitmask of subsystems intended to be attached to this | 112 | * The bitmask of subsystems intended to be attached to this |
| 112 | * hierarchy | 113 | * hierarchy |
| 113 | */ | 114 | */ |
| 114 | unsigned long subsys_bits; | 115 | unsigned long subsys_mask; |
| 115 | 116 | ||
| 116 | /* Unique id for this hierarchy. */ | 117 | /* Unique id for this hierarchy. */ |
| 117 | int hierarchy_id; | 118 | int hierarchy_id; |
| 118 | 119 | ||
| 119 | /* The bitmask of subsystems currently attached to this hierarchy */ | 120 | /* The bitmask of subsystems currently attached to this hierarchy */ |
| 120 | unsigned long actual_subsys_bits; | 121 | unsigned long actual_subsys_mask; |
| 121 | 122 | ||
| 122 | /* A list running through the attached subsystems */ | 123 | /* A list running through the attached subsystems */ |
| 123 | struct list_head subsys_list; | 124 | struct list_head subsys_list; |
| @@ -276,7 +277,8 @@ inline int cgroup_is_removed(const struct cgroup *cgrp) | |||
| 276 | 277 | ||
| 277 | /* bits in struct cgroupfs_root flags field */ | 278 | /* bits in struct cgroupfs_root flags field */ |
| 278 | enum { | 279 | enum { |
| 279 | ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ | 280 | ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ |
| 281 | ROOT_XATTR, /* supports extended attributes */ | ||
| 280 | }; | 282 | }; |
| 281 | 283 | ||
| 282 | static int cgroup_is_releasable(const struct cgroup *cgrp) | 284 | static int cgroup_is_releasable(const struct cgroup *cgrp) |
| @@ -556,7 +558,7 @@ static struct css_set *find_existing_css_set( | |||
| 556 | * won't change, so no need for locking. | 558 | * won't change, so no need for locking. |
| 557 | */ | 559 | */ |
| 558 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 560 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 559 | if (root->subsys_bits & (1UL << i)) { | 561 | if (root->subsys_mask & (1UL << i)) { |
| 560 | /* Subsystem is in this hierarchy. So we want | 562 | /* Subsystem is in this hierarchy. So we want |
| 561 | * the subsystem state from the new | 563 | * the subsystem state from the new |
| 562 | * cgroup */ | 564 | * cgroup */ |
| @@ -824,7 +826,8 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); | |||
| 824 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); | 826 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); |
| 825 | static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); | 827 | static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); |
| 826 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); | 828 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); |
| 827 | static int cgroup_populate_dir(struct cgroup *cgrp); | 829 | static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, |
| 830 | unsigned long subsys_mask); | ||
| 828 | static const struct inode_operations cgroup_dir_inode_operations; | 831 | static const struct inode_operations cgroup_dir_inode_operations; |
| 829 | static const struct file_operations proc_cgroupstats_operations; | 832 | static const struct file_operations proc_cgroupstats_operations; |
| 830 | 833 | ||
| @@ -912,15 +915,19 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
| 912 | */ | 915 | */ |
| 913 | BUG_ON(!list_empty(&cgrp->pidlists)); | 916 | BUG_ON(!list_empty(&cgrp->pidlists)); |
| 914 | 917 | ||
| 918 | simple_xattrs_free(&cgrp->xattrs); | ||
| 919 | |||
| 915 | kfree_rcu(cgrp, rcu_head); | 920 | kfree_rcu(cgrp, rcu_head); |
| 916 | } else { | 921 | } else { |
| 917 | struct cfent *cfe = __d_cfe(dentry); | 922 | struct cfent *cfe = __d_cfe(dentry); |
| 918 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; | 923 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; |
| 924 | struct cftype *cft = cfe->type; | ||
| 919 | 925 | ||
| 920 | WARN_ONCE(!list_empty(&cfe->node) && | 926 | WARN_ONCE(!list_empty(&cfe->node) && |
| 921 | cgrp != &cgrp->root->top_cgroup, | 927 | cgrp != &cgrp->root->top_cgroup, |
| 922 | "cfe still linked for %s\n", cfe->type->name); | 928 | "cfe still linked for %s\n", cfe->type->name); |
| 923 | kfree(cfe); | 929 | kfree(cfe); |
| 930 | simple_xattrs_free(&cft->xattrs); | ||
| 924 | } | 931 | } |
| 925 | iput(inode); | 932 | iput(inode); |
| 926 | } | 933 | } |
| @@ -963,12 +970,29 @@ static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) | |||
| 963 | return -ENOENT; | 970 | return -ENOENT; |
| 964 | } | 971 | } |
| 965 | 972 | ||
| 966 | static void cgroup_clear_directory(struct dentry *dir) | 973 | /** |
| 974 | * cgroup_clear_directory - selective removal of base and subsystem files | ||
| 975 | * @dir: directory containing the files | ||
| 976 | * @base_files: true if the base files should be removed | ||
| 977 | * @subsys_mask: mask of the subsystem ids whose files should be removed | ||
| 978 | */ | ||
| 979 | static void cgroup_clear_directory(struct dentry *dir, bool base_files, | ||
| 980 | unsigned long subsys_mask) | ||
| 967 | { | 981 | { |
| 968 | struct cgroup *cgrp = __d_cgrp(dir); | 982 | struct cgroup *cgrp = __d_cgrp(dir); |
| 983 | struct cgroup_subsys *ss; | ||
| 969 | 984 | ||
| 970 | while (!list_empty(&cgrp->files)) | 985 | for_each_subsys(cgrp->root, ss) { |
| 971 | cgroup_rm_file(cgrp, NULL); | 986 | struct cftype_set *set; |
| 987 | if (!test_bit(ss->subsys_id, &subsys_mask)) | ||
| 988 | continue; | ||
| 989 | list_for_each_entry(set, &ss->cftsets, node) | ||
| 990 | cgroup_rm_file(cgrp, set->cfts); | ||
| 991 | } | ||
| 992 | if (base_files) { | ||
| 993 | while (!list_empty(&cgrp->files)) | ||
| 994 | cgroup_rm_file(cgrp, NULL); | ||
| 995 | } | ||
| 972 | } | 996 | } |
| 973 | 997 | ||
| 974 | /* | 998 | /* |
| @@ -977,8 +1001,9 @@ static void cgroup_clear_directory(struct dentry *dir) | |||
| 977 | static void cgroup_d_remove_dir(struct dentry *dentry) | 1001 | static void cgroup_d_remove_dir(struct dentry *dentry) |
| 978 | { | 1002 | { |
| 979 | struct dentry *parent; | 1003 | struct dentry *parent; |
| 1004 | struct cgroupfs_root *root = dentry->d_sb->s_fs_info; | ||
| 980 | 1005 | ||
| 981 | cgroup_clear_directory(dentry); | 1006 | cgroup_clear_directory(dentry, true, root->subsys_mask); |
| 982 | 1007 | ||
| 983 | parent = dentry->d_parent; | 1008 | parent = dentry->d_parent; |
| 984 | spin_lock(&parent->d_lock); | 1009 | spin_lock(&parent->d_lock); |
| @@ -1022,22 +1047,22 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css) | |||
| 1022 | * returns an error, no reference counts are touched. | 1047 | * returns an error, no reference counts are touched. |
| 1023 | */ | 1048 | */ |
| 1024 | static int rebind_subsystems(struct cgroupfs_root *root, | 1049 | static int rebind_subsystems(struct cgroupfs_root *root, |
| 1025 | unsigned long final_bits) | 1050 | unsigned long final_subsys_mask) |
| 1026 | { | 1051 | { |
| 1027 | unsigned long added_bits, removed_bits; | 1052 | unsigned long added_mask, removed_mask; |
| 1028 | struct cgroup *cgrp = &root->top_cgroup; | 1053 | struct cgroup *cgrp = &root->top_cgroup; |
| 1029 | int i; | 1054 | int i; |
| 1030 | 1055 | ||
| 1031 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); | 1056 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); |
| 1032 | BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); | 1057 | BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); |
| 1033 | 1058 | ||
| 1034 | removed_bits = root->actual_subsys_bits & ~final_bits; | 1059 | removed_mask = root->actual_subsys_mask & ~final_subsys_mask; |
| 1035 | added_bits = final_bits & ~root->actual_subsys_bits; | 1060 | added_mask = final_subsys_mask & ~root->actual_subsys_mask; |
| 1036 | /* Check that any added subsystems are currently free */ | 1061 | /* Check that any added subsystems are currently free */ |
| 1037 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 1062 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 1038 | unsigned long bit = 1UL << i; | 1063 | unsigned long bit = 1UL << i; |
| 1039 | struct cgroup_subsys *ss = subsys[i]; | 1064 | struct cgroup_subsys *ss = subsys[i]; |
| 1040 | if (!(bit & added_bits)) | 1065 | if (!(bit & added_mask)) |
| 1041 | continue; | 1066 | continue; |
| 1042 | /* | 1067 | /* |
| 1043 | * Nobody should tell us to do a subsys that doesn't exist: | 1068 | * Nobody should tell us to do a subsys that doesn't exist: |
| @@ -1062,7 +1087,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
| 1062 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 1087 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 1063 | struct cgroup_subsys *ss = subsys[i]; | 1088 | struct cgroup_subsys *ss = subsys[i]; |
| 1064 | unsigned long bit = 1UL << i; | 1089 | unsigned long bit = 1UL << i; |
| 1065 | if (bit & added_bits) { | 1090 | if (bit & added_mask) { |
| 1066 | /* We're binding this subsystem to this hierarchy */ | 1091 | /* We're binding this subsystem to this hierarchy */ |
| 1067 | BUG_ON(ss == NULL); | 1092 | BUG_ON(ss == NULL); |
| 1068 | BUG_ON(cgrp->subsys[i]); | 1093 | BUG_ON(cgrp->subsys[i]); |
| @@ -1075,7 +1100,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
| 1075 | if (ss->bind) | 1100 | if (ss->bind) |
| 1076 | ss->bind(cgrp); | 1101 | ss->bind(cgrp); |
| 1077 | /* refcount was already taken, and we're keeping it */ | 1102 | /* refcount was already taken, and we're keeping it */ |
| 1078 | } else if (bit & removed_bits) { | 1103 | } else if (bit & removed_mask) { |
| 1079 | /* We're removing this subsystem */ | 1104 | /* We're removing this subsystem */ |
| 1080 | BUG_ON(ss == NULL); | 1105 | BUG_ON(ss == NULL); |
| 1081 | BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); | 1106 | BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); |
| @@ -1088,7 +1113,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
| 1088 | list_move(&ss->sibling, &rootnode.subsys_list); | 1113 | list_move(&ss->sibling, &rootnode.subsys_list); |
| 1089 | /* subsystem is now free - drop reference on module */ | 1114 | /* subsystem is now free - drop reference on module */ |
| 1090 | module_put(ss->module); | 1115 | module_put(ss->module); |
| 1091 | } else if (bit & final_bits) { | 1116 | } else if (bit & final_subsys_mask) { |
| 1092 | /* Subsystem state should already exist */ | 1117 | /* Subsystem state should already exist */ |
| 1093 | BUG_ON(ss == NULL); | 1118 | BUG_ON(ss == NULL); |
| 1094 | BUG_ON(!cgrp->subsys[i]); | 1119 | BUG_ON(!cgrp->subsys[i]); |
| @@ -1105,7 +1130,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
| 1105 | BUG_ON(cgrp->subsys[i]); | 1130 | BUG_ON(cgrp->subsys[i]); |
| 1106 | } | 1131 | } |
| 1107 | } | 1132 | } |
| 1108 | root->subsys_bits = root->actual_subsys_bits = final_bits; | 1133 | root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; |
| 1109 | synchronize_rcu(); | 1134 | synchronize_rcu(); |
| 1110 | 1135 | ||
| 1111 | return 0; | 1136 | return 0; |
| @@ -1121,6 +1146,8 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) | |||
| 1121 | seq_printf(seq, ",%s", ss->name); | 1146 | seq_printf(seq, ",%s", ss->name); |
| 1122 | if (test_bit(ROOT_NOPREFIX, &root->flags)) | 1147 | if (test_bit(ROOT_NOPREFIX, &root->flags)) |
| 1123 | seq_puts(seq, ",noprefix"); | 1148 | seq_puts(seq, ",noprefix"); |
| 1149 | if (test_bit(ROOT_XATTR, &root->flags)) | ||
| 1150 | seq_puts(seq, ",xattr"); | ||
| 1124 | if (strlen(root->release_agent_path)) | 1151 | if (strlen(root->release_agent_path)) |
| 1125 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); | 1152 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); |
| 1126 | if (clone_children(&root->top_cgroup)) | 1153 | if (clone_children(&root->top_cgroup)) |
| @@ -1132,7 +1159,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) | |||
| 1132 | } | 1159 | } |
| 1133 | 1160 | ||
| 1134 | struct cgroup_sb_opts { | 1161 | struct cgroup_sb_opts { |
| 1135 | unsigned long subsys_bits; | 1162 | unsigned long subsys_mask; |
| 1136 | unsigned long flags; | 1163 | unsigned long flags; |
| 1137 | char *release_agent; | 1164 | char *release_agent; |
| 1138 | bool clone_children; | 1165 | bool clone_children; |
| @@ -1189,6 +1216,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1189 | opts->clone_children = true; | 1216 | opts->clone_children = true; |
| 1190 | continue; | 1217 | continue; |
| 1191 | } | 1218 | } |
| 1219 | if (!strcmp(token, "xattr")) { | ||
| 1220 | set_bit(ROOT_XATTR, &opts->flags); | ||
| 1221 | continue; | ||
| 1222 | } | ||
| 1192 | if (!strncmp(token, "release_agent=", 14)) { | 1223 | if (!strncmp(token, "release_agent=", 14)) { |
| 1193 | /* Specifying two release agents is forbidden */ | 1224 | /* Specifying two release agents is forbidden */ |
| 1194 | if (opts->release_agent) | 1225 | if (opts->release_agent) |
| @@ -1237,7 +1268,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1237 | /* Mutually exclusive option 'all' + subsystem name */ | 1268 | /* Mutually exclusive option 'all' + subsystem name */ |
| 1238 | if (all_ss) | 1269 | if (all_ss) |
| 1239 | return -EINVAL; | 1270 | return -EINVAL; |
| 1240 | set_bit(i, &opts->subsys_bits); | 1271 | set_bit(i, &opts->subsys_mask); |
| 1241 | one_ss = true; | 1272 | one_ss = true; |
| 1242 | 1273 | ||
| 1243 | break; | 1274 | break; |
| @@ -1258,7 +1289,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1258 | continue; | 1289 | continue; |
| 1259 | if (ss->disabled) | 1290 | if (ss->disabled) |
| 1260 | continue; | 1291 | continue; |
| 1261 | set_bit(i, &opts->subsys_bits); | 1292 | set_bit(i, &opts->subsys_mask); |
| 1262 | } | 1293 | } |
| 1263 | } | 1294 | } |
| 1264 | 1295 | ||
| @@ -1270,19 +1301,19 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1270 | * the cpuset subsystem. | 1301 | * the cpuset subsystem. |
| 1271 | */ | 1302 | */ |
| 1272 | if (test_bit(ROOT_NOPREFIX, &opts->flags) && | 1303 | if (test_bit(ROOT_NOPREFIX, &opts->flags) && |
| 1273 | (opts->subsys_bits & mask)) | 1304 | (opts->subsys_mask & mask)) |
| 1274 | return -EINVAL; | 1305 | return -EINVAL; |
| 1275 | 1306 | ||
| 1276 | 1307 | ||
| 1277 | /* Can't specify "none" and some subsystems */ | 1308 | /* Can't specify "none" and some subsystems */ |
| 1278 | if (opts->subsys_bits && opts->none) | 1309 | if (opts->subsys_mask && opts->none) |
| 1279 | return -EINVAL; | 1310 | return -EINVAL; |
| 1280 | 1311 | ||
| 1281 | /* | 1312 | /* |
| 1282 | * We either have to specify by name or by subsystems. (So all | 1313 | * We either have to specify by name or by subsystems. (So all |
| 1283 | * empty hierarchies must have a name). | 1314 | * empty hierarchies must have a name). |
| 1284 | */ | 1315 | */ |
| 1285 | if (!opts->subsys_bits && !opts->name) | 1316 | if (!opts->subsys_mask && !opts->name) |
| 1286 | return -EINVAL; | 1317 | return -EINVAL; |
| 1287 | 1318 | ||
| 1288 | /* | 1319 | /* |
| @@ -1291,10 +1322,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1291 | * take duplicate reference counts on a subsystem that's already used, | 1322 | * take duplicate reference counts on a subsystem that's already used, |
| 1292 | * but rebind_subsystems handles this case. | 1323 | * but rebind_subsystems handles this case. |
| 1293 | */ | 1324 | */ |
| 1294 | for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) { | 1325 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 1295 | unsigned long bit = 1UL << i; | 1326 | unsigned long bit = 1UL << i; |
| 1296 | 1327 | ||
| 1297 | if (!(bit & opts->subsys_bits)) | 1328 | if (!(bit & opts->subsys_mask)) |
| 1298 | continue; | 1329 | continue; |
| 1299 | if (!try_module_get(subsys[i]->module)) { | 1330 | if (!try_module_get(subsys[i]->module)) { |
| 1300 | module_pin_failed = true; | 1331 | module_pin_failed = true; |
| @@ -1307,11 +1338,11 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1307 | * raced with a module_delete call, and to the user this is | 1338 | * raced with a module_delete call, and to the user this is |
| 1308 | * essentially a "subsystem doesn't exist" case. | 1339 | * essentially a "subsystem doesn't exist" case. |
| 1309 | */ | 1340 | */ |
| 1310 | for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) { | 1341 | for (i--; i >= 0; i--) { |
| 1311 | /* drop refcounts only on the ones we took */ | 1342 | /* drop refcounts only on the ones we took */ |
| 1312 | unsigned long bit = 1UL << i; | 1343 | unsigned long bit = 1UL << i; |
| 1313 | 1344 | ||
| 1314 | if (!(bit & opts->subsys_bits)) | 1345 | if (!(bit & opts->subsys_mask)) |
| 1315 | continue; | 1346 | continue; |
| 1316 | module_put(subsys[i]->module); | 1347 | module_put(subsys[i]->module); |
| 1317 | } | 1348 | } |
| @@ -1321,13 +1352,13 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1321 | return 0; | 1352 | return 0; |
| 1322 | } | 1353 | } |
| 1323 | 1354 | ||
| 1324 | static void drop_parsed_module_refcounts(unsigned long subsys_bits) | 1355 | static void drop_parsed_module_refcounts(unsigned long subsys_mask) |
| 1325 | { | 1356 | { |
| 1326 | int i; | 1357 | int i; |
| 1327 | for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) { | 1358 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 1328 | unsigned long bit = 1UL << i; | 1359 | unsigned long bit = 1UL << i; |
| 1329 | 1360 | ||
| 1330 | if (!(bit & subsys_bits)) | 1361 | if (!(bit & subsys_mask)) |
| 1331 | continue; | 1362 | continue; |
| 1332 | module_put(subsys[i]->module); | 1363 | module_put(subsys[i]->module); |
| 1333 | } | 1364 | } |
| @@ -1339,6 +1370,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 1339 | struct cgroupfs_root *root = sb->s_fs_info; | 1370 | struct cgroupfs_root *root = sb->s_fs_info; |
| 1340 | struct cgroup *cgrp = &root->top_cgroup; | 1371 | struct cgroup *cgrp = &root->top_cgroup; |
| 1341 | struct cgroup_sb_opts opts; | 1372 | struct cgroup_sb_opts opts; |
| 1373 | unsigned long added_mask, removed_mask; | ||
| 1342 | 1374 | ||
| 1343 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); | 1375 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); |
| 1344 | mutex_lock(&cgroup_mutex); | 1376 | mutex_lock(&cgroup_mutex); |
| @@ -1350,27 +1382,31 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 1350 | goto out_unlock; | 1382 | goto out_unlock; |
| 1351 | 1383 | ||
| 1352 | /* See feature-removal-schedule.txt */ | 1384 | /* See feature-removal-schedule.txt */ |
| 1353 | if (opts.subsys_bits != root->actual_subsys_bits || opts.release_agent) | 1385 | if (opts.subsys_mask != root->actual_subsys_mask || opts.release_agent) |
| 1354 | pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", | 1386 | pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", |
| 1355 | task_tgid_nr(current), current->comm); | 1387 | task_tgid_nr(current), current->comm); |
| 1356 | 1388 | ||
| 1389 | added_mask = opts.subsys_mask & ~root->subsys_mask; | ||
| 1390 | removed_mask = root->subsys_mask & ~opts.subsys_mask; | ||
| 1391 | |||
| 1357 | /* Don't allow flags or name to change at remount */ | 1392 | /* Don't allow flags or name to change at remount */ |
| 1358 | if (opts.flags != root->flags || | 1393 | if (opts.flags != root->flags || |
| 1359 | (opts.name && strcmp(opts.name, root->name))) { | 1394 | (opts.name && strcmp(opts.name, root->name))) { |
| 1360 | ret = -EINVAL; | 1395 | ret = -EINVAL; |
| 1361 | drop_parsed_module_refcounts(opts.subsys_bits); | 1396 | drop_parsed_module_refcounts(opts.subsys_mask); |
| 1362 | goto out_unlock; | 1397 | goto out_unlock; |
| 1363 | } | 1398 | } |
| 1364 | 1399 | ||
| 1365 | ret = rebind_subsystems(root, opts.subsys_bits); | 1400 | ret = rebind_subsystems(root, opts.subsys_mask); |
| 1366 | if (ret) { | 1401 | if (ret) { |
| 1367 | drop_parsed_module_refcounts(opts.subsys_bits); | 1402 | drop_parsed_module_refcounts(opts.subsys_mask); |
| 1368 | goto out_unlock; | 1403 | goto out_unlock; |
| 1369 | } | 1404 | } |
| 1370 | 1405 | ||
| 1371 | /* clear out any existing files and repopulate subsystem files */ | 1406 | /* clear out any existing files and repopulate subsystem files */ |
| 1372 | cgroup_clear_directory(cgrp->dentry); | 1407 | cgroup_clear_directory(cgrp->dentry, false, removed_mask); |
| 1373 | cgroup_populate_dir(cgrp); | 1408 | /* re-populate subsystem files */ |
| 1409 | cgroup_populate_dir(cgrp, false, added_mask); | ||
| 1374 | 1410 | ||
| 1375 | if (opts.release_agent) | 1411 | if (opts.release_agent) |
| 1376 | strcpy(root->release_agent_path, opts.release_agent); | 1412 | strcpy(root->release_agent_path, opts.release_agent); |
| @@ -1401,6 +1437,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
| 1401 | mutex_init(&cgrp->pidlist_mutex); | 1437 | mutex_init(&cgrp->pidlist_mutex); |
| 1402 | INIT_LIST_HEAD(&cgrp->event_list); | 1438 | INIT_LIST_HEAD(&cgrp->event_list); |
| 1403 | spin_lock_init(&cgrp->event_list_lock); | 1439 | spin_lock_init(&cgrp->event_list_lock); |
| 1440 | simple_xattrs_init(&cgrp->xattrs); | ||
| 1404 | } | 1441 | } |
| 1405 | 1442 | ||
| 1406 | static void init_cgroup_root(struct cgroupfs_root *root) | 1443 | static void init_cgroup_root(struct cgroupfs_root *root) |
| @@ -1455,8 +1492,8 @@ static int cgroup_test_super(struct super_block *sb, void *data) | |||
| 1455 | * If we asked for subsystems (or explicitly for no | 1492 | * If we asked for subsystems (or explicitly for no |
| 1456 | * subsystems) then they must match | 1493 | * subsystems) then they must match |
| 1457 | */ | 1494 | */ |
| 1458 | if ((opts->subsys_bits || opts->none) | 1495 | if ((opts->subsys_mask || opts->none) |
| 1459 | && (opts->subsys_bits != root->subsys_bits)) | 1496 | && (opts->subsys_mask != root->subsys_mask)) |
| 1460 | return 0; | 1497 | return 0; |
| 1461 | 1498 | ||
| 1462 | return 1; | 1499 | return 1; |
| @@ -1466,7 +1503,7 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) | |||
| 1466 | { | 1503 | { |
| 1467 | struct cgroupfs_root *root; | 1504 | struct cgroupfs_root *root; |
| 1468 | 1505 | ||
| 1469 | if (!opts->subsys_bits && !opts->none) | 1506 | if (!opts->subsys_mask && !opts->none) |
| 1470 | return NULL; | 1507 | return NULL; |
| 1471 | 1508 | ||
| 1472 | root = kzalloc(sizeof(*root), GFP_KERNEL); | 1509 | root = kzalloc(sizeof(*root), GFP_KERNEL); |
| @@ -1479,7 +1516,7 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) | |||
| 1479 | } | 1516 | } |
| 1480 | init_cgroup_root(root); | 1517 | init_cgroup_root(root); |
| 1481 | 1518 | ||
| 1482 | root->subsys_bits = opts->subsys_bits; | 1519 | root->subsys_mask = opts->subsys_mask; |
| 1483 | root->flags = opts->flags; | 1520 | root->flags = opts->flags; |
| 1484 | if (opts->release_agent) | 1521 | if (opts->release_agent) |
| 1485 | strcpy(root->release_agent_path, opts->release_agent); | 1522 | strcpy(root->release_agent_path, opts->release_agent); |
| @@ -1511,7 +1548,7 @@ static int cgroup_set_super(struct super_block *sb, void *data) | |||
| 1511 | if (!opts->new_root) | 1548 | if (!opts->new_root) |
| 1512 | return -EINVAL; | 1549 | return -EINVAL; |
| 1513 | 1550 | ||
| 1514 | BUG_ON(!opts->subsys_bits && !opts->none); | 1551 | BUG_ON(!opts->subsys_mask && !opts->none); |
| 1515 | 1552 | ||
| 1516 | ret = set_anon_super(sb, NULL); | 1553 | ret = set_anon_super(sb, NULL); |
| 1517 | if (ret) | 1554 | if (ret) |
| @@ -1629,7 +1666,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1629 | if (ret) | 1666 | if (ret) |
| 1630 | goto unlock_drop; | 1667 | goto unlock_drop; |
| 1631 | 1668 | ||
| 1632 | ret = rebind_subsystems(root, root->subsys_bits); | 1669 | ret = rebind_subsystems(root, root->subsys_mask); |
| 1633 | if (ret == -EBUSY) { | 1670 | if (ret == -EBUSY) { |
| 1634 | free_cg_links(&tmp_cg_links); | 1671 | free_cg_links(&tmp_cg_links); |
| 1635 | goto unlock_drop; | 1672 | goto unlock_drop; |
| @@ -1669,7 +1706,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1669 | BUG_ON(root->number_of_cgroups != 1); | 1706 | BUG_ON(root->number_of_cgroups != 1); |
| 1670 | 1707 | ||
| 1671 | cred = override_creds(&init_cred); | 1708 | cred = override_creds(&init_cred); |
| 1672 | cgroup_populate_dir(root_cgrp); | 1709 | cgroup_populate_dir(root_cgrp, true, root->subsys_mask); |
| 1673 | revert_creds(cred); | 1710 | revert_creds(cred); |
| 1674 | mutex_unlock(&cgroup_root_mutex); | 1711 | mutex_unlock(&cgroup_root_mutex); |
| 1675 | mutex_unlock(&cgroup_mutex); | 1712 | mutex_unlock(&cgroup_mutex); |
| @@ -1681,7 +1718,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1681 | */ | 1718 | */ |
| 1682 | cgroup_drop_root(opts.new_root); | 1719 | cgroup_drop_root(opts.new_root); |
| 1683 | /* no subsys rebinding, so refcounts don't change */ | 1720 | /* no subsys rebinding, so refcounts don't change */ |
| 1684 | drop_parsed_module_refcounts(opts.subsys_bits); | 1721 | drop_parsed_module_refcounts(opts.subsys_mask); |
| 1685 | } | 1722 | } |
| 1686 | 1723 | ||
| 1687 | kfree(opts.release_agent); | 1724 | kfree(opts.release_agent); |
| @@ -1695,7 +1732,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1695 | drop_new_super: | 1732 | drop_new_super: |
| 1696 | deactivate_locked_super(sb); | 1733 | deactivate_locked_super(sb); |
| 1697 | drop_modules: | 1734 | drop_modules: |
| 1698 | drop_parsed_module_refcounts(opts.subsys_bits); | 1735 | drop_parsed_module_refcounts(opts.subsys_mask); |
| 1699 | out_err: | 1736 | out_err: |
| 1700 | kfree(opts.release_agent); | 1737 | kfree(opts.release_agent); |
| 1701 | kfree(opts.name); | 1738 | kfree(opts.name); |
| @@ -1745,6 +1782,8 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
| 1745 | mutex_unlock(&cgroup_root_mutex); | 1782 | mutex_unlock(&cgroup_root_mutex); |
| 1746 | mutex_unlock(&cgroup_mutex); | 1783 | mutex_unlock(&cgroup_mutex); |
| 1747 | 1784 | ||
| 1785 | simple_xattrs_free(&cgrp->xattrs); | ||
| 1786 | |||
| 1748 | kill_litter_super(sb); | 1787 | kill_litter_super(sb); |
| 1749 | cgroup_drop_root(root); | 1788 | cgroup_drop_root(root); |
| 1750 | } | 1789 | } |
| @@ -2551,6 +2590,64 @@ static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 2551 | return simple_rename(old_dir, old_dentry, new_dir, new_dentry); | 2590 | return simple_rename(old_dir, old_dentry, new_dir, new_dentry); |
| 2552 | } | 2591 | } |
| 2553 | 2592 | ||
| 2593 | static struct simple_xattrs *__d_xattrs(struct dentry *dentry) | ||
| 2594 | { | ||
| 2595 | if (S_ISDIR(dentry->d_inode->i_mode)) | ||
| 2596 | return &__d_cgrp(dentry)->xattrs; | ||
| 2597 | else | ||
| 2598 | return &__d_cft(dentry)->xattrs; | ||
| 2599 | } | ||
| 2600 | |||
| 2601 | static inline int xattr_enabled(struct dentry *dentry) | ||
| 2602 | { | ||
| 2603 | struct cgroupfs_root *root = dentry->d_sb->s_fs_info; | ||
| 2604 | return test_bit(ROOT_XATTR, &root->flags); | ||
| 2605 | } | ||
| 2606 | |||
| 2607 | static bool is_valid_xattr(const char *name) | ||
| 2608 | { | ||
| 2609 | if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || | ||
| 2610 | !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) | ||
| 2611 | return true; | ||
| 2612 | return false; | ||
| 2613 | } | ||
| 2614 | |||
| 2615 | static int cgroup_setxattr(struct dentry *dentry, const char *name, | ||
| 2616 | const void *val, size_t size, int flags) | ||
| 2617 | { | ||
| 2618 | if (!xattr_enabled(dentry)) | ||
| 2619 | return -EOPNOTSUPP; | ||
| 2620 | if (!is_valid_xattr(name)) | ||
| 2621 | return -EINVAL; | ||
| 2622 | return simple_xattr_set(__d_xattrs(dentry), name, val, size, flags); | ||
| 2623 | } | ||
| 2624 | |||
| 2625 | static int cgroup_removexattr(struct dentry *dentry, const char *name) | ||
| 2626 | { | ||
| 2627 | if (!xattr_enabled(dentry)) | ||
| 2628 | return -EOPNOTSUPP; | ||
| 2629 | if (!is_valid_xattr(name)) | ||
| 2630 | return -EINVAL; | ||
| 2631 | return simple_xattr_remove(__d_xattrs(dentry), name); | ||
| 2632 | } | ||
| 2633 | |||
| 2634 | static ssize_t cgroup_getxattr(struct dentry *dentry, const char *name, | ||
| 2635 | void *buf, size_t size) | ||
| 2636 | { | ||
| 2637 | if (!xattr_enabled(dentry)) | ||
| 2638 | return -EOPNOTSUPP; | ||
| 2639 | if (!is_valid_xattr(name)) | ||
| 2640 | return -EINVAL; | ||
| 2641 | return simple_xattr_get(__d_xattrs(dentry), name, buf, size); | ||
| 2642 | } | ||
| 2643 | |||
| 2644 | static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size) | ||
| 2645 | { | ||
| 2646 | if (!xattr_enabled(dentry)) | ||
| 2647 | return -EOPNOTSUPP; | ||
| 2648 | return simple_xattr_list(__d_xattrs(dentry), buf, size); | ||
| 2649 | } | ||
| 2650 | |||
| 2554 | static const struct file_operations cgroup_file_operations = { | 2651 | static const struct file_operations cgroup_file_operations = { |
| 2555 | .read = cgroup_file_read, | 2652 | .read = cgroup_file_read, |
| 2556 | .write = cgroup_file_write, | 2653 | .write = cgroup_file_write, |
| @@ -2559,11 +2656,22 @@ static const struct file_operations cgroup_file_operations = { | |||
| 2559 | .release = cgroup_file_release, | 2656 | .release = cgroup_file_release, |
| 2560 | }; | 2657 | }; |
| 2561 | 2658 | ||
| 2659 | static const struct inode_operations cgroup_file_inode_operations = { | ||
| 2660 | .setxattr = cgroup_setxattr, | ||
| 2661 | .getxattr = cgroup_getxattr, | ||
| 2662 | .listxattr = cgroup_listxattr, | ||
| 2663 | .removexattr = cgroup_removexattr, | ||
| 2664 | }; | ||
| 2665 | |||
| 2562 | static const struct inode_operations cgroup_dir_inode_operations = { | 2666 | static const struct inode_operations cgroup_dir_inode_operations = { |
| 2563 | .lookup = cgroup_lookup, | 2667 | .lookup = cgroup_lookup, |
| 2564 | .mkdir = cgroup_mkdir, | 2668 | .mkdir = cgroup_mkdir, |
| 2565 | .rmdir = cgroup_rmdir, | 2669 | .rmdir = cgroup_rmdir, |
| 2566 | .rename = cgroup_rename, | 2670 | .rename = cgroup_rename, |
| 2671 | .setxattr = cgroup_setxattr, | ||
| 2672 | .getxattr = cgroup_getxattr, | ||
| 2673 | .listxattr = cgroup_listxattr, | ||
| 2674 | .removexattr = cgroup_removexattr, | ||
| 2567 | }; | 2675 | }; |
| 2568 | 2676 | ||
| 2569 | static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | 2677 | static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) |
| @@ -2611,6 +2719,7 @@ static int cgroup_create_file(struct dentry *dentry, umode_t mode, | |||
| 2611 | } else if (S_ISREG(mode)) { | 2719 | } else if (S_ISREG(mode)) { |
| 2612 | inode->i_size = 0; | 2720 | inode->i_size = 0; |
| 2613 | inode->i_fop = &cgroup_file_operations; | 2721 | inode->i_fop = &cgroup_file_operations; |
| 2722 | inode->i_op = &cgroup_file_inode_operations; | ||
| 2614 | } | 2723 | } |
| 2615 | d_instantiate(dentry, inode); | 2724 | d_instantiate(dentry, inode); |
| 2616 | dget(dentry); /* Extra count - pin the dentry in core */ | 2725 | dget(dentry); /* Extra count - pin the dentry in core */ |
| @@ -2671,7 +2780,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft) | |||
| 2671 | } | 2780 | } |
| 2672 | 2781 | ||
| 2673 | static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, | 2782 | static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, |
| 2674 | const struct cftype *cft) | 2783 | struct cftype *cft) |
| 2675 | { | 2784 | { |
| 2676 | struct dentry *dir = cgrp->dentry; | 2785 | struct dentry *dir = cgrp->dentry; |
| 2677 | struct cgroup *parent = __d_cgrp(dir); | 2786 | struct cgroup *parent = __d_cgrp(dir); |
| @@ -2681,6 +2790,8 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
| 2681 | umode_t mode; | 2790 | umode_t mode; |
| 2682 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; | 2791 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; |
| 2683 | 2792 | ||
| 2793 | simple_xattrs_init(&cft->xattrs); | ||
| 2794 | |||
| 2684 | /* does @cft->flags tell us to skip creation on @cgrp? */ | 2795 | /* does @cft->flags tell us to skip creation on @cgrp? */ |
| 2685 | if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) | 2796 | if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) |
| 2686 | return 0; | 2797 | return 0; |
| @@ -2721,9 +2832,9 @@ out: | |||
| 2721 | } | 2832 | } |
| 2722 | 2833 | ||
| 2723 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | 2834 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, |
| 2724 | const struct cftype cfts[], bool is_add) | 2835 | struct cftype cfts[], bool is_add) |
| 2725 | { | 2836 | { |
| 2726 | const struct cftype *cft; | 2837 | struct cftype *cft; |
| 2727 | int err, ret = 0; | 2838 | int err, ret = 0; |
| 2728 | 2839 | ||
| 2729 | for (cft = cfts; cft->name[0] != '\0'; cft++) { | 2840 | for (cft = cfts; cft->name[0] != '\0'; cft++) { |
| @@ -2757,7 +2868,7 @@ static void cgroup_cfts_prepare(void) | |||
| 2757 | } | 2868 | } |
| 2758 | 2869 | ||
| 2759 | static void cgroup_cfts_commit(struct cgroup_subsys *ss, | 2870 | static void cgroup_cfts_commit(struct cgroup_subsys *ss, |
| 2760 | const struct cftype *cfts, bool is_add) | 2871 | struct cftype *cfts, bool is_add) |
| 2761 | __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex) | 2872 | __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex) |
| 2762 | { | 2873 | { |
| 2763 | LIST_HEAD(pending); | 2874 | LIST_HEAD(pending); |
| @@ -2808,7 +2919,7 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss, | |||
| 2808 | * function currently returns 0 as long as @cfts registration is successful | 2919 | * function currently returns 0 as long as @cfts registration is successful |
| 2809 | * even if some file creation attempts on existing cgroups fail. | 2920 | * even if some file creation attempts on existing cgroups fail. |
| 2810 | */ | 2921 | */ |
| 2811 | int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) | 2922 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) |
| 2812 | { | 2923 | { |
| 2813 | struct cftype_set *set; | 2924 | struct cftype_set *set; |
| 2814 | 2925 | ||
| @@ -2838,7 +2949,7 @@ EXPORT_SYMBOL_GPL(cgroup_add_cftypes); | |||
| 2838 | * Returns 0 on successful unregistration, -ENOENT if @cfts is not | 2949 | * Returns 0 on successful unregistration, -ENOENT if @cfts is not |
| 2839 | * registered with @ss. | 2950 | * registered with @ss. |
| 2840 | */ | 2951 | */ |
| 2841 | int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) | 2952 | int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) |
| 2842 | { | 2953 | { |
| 2843 | struct cftype_set *set; | 2954 | struct cftype_set *set; |
| 2844 | 2955 | ||
| @@ -3843,18 +3954,29 @@ static struct cftype files[] = { | |||
| 3843 | { } /* terminate */ | 3954 | { } /* terminate */ |
| 3844 | }; | 3955 | }; |
| 3845 | 3956 | ||
| 3846 | static int cgroup_populate_dir(struct cgroup *cgrp) | 3957 | /** |
| 3958 | * cgroup_populate_dir - selectively creation of files in a directory | ||
| 3959 | * @cgrp: target cgroup | ||
| 3960 | * @base_files: true if the base files should be added | ||
| 3961 | * @subsys_mask: mask of the subsystem ids whose files should be added | ||
| 3962 | */ | ||
| 3963 | static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, | ||
| 3964 | unsigned long subsys_mask) | ||
| 3847 | { | 3965 | { |
| 3848 | int err; | 3966 | int err; |
| 3849 | struct cgroup_subsys *ss; | 3967 | struct cgroup_subsys *ss; |
| 3850 | 3968 | ||
| 3851 | err = cgroup_addrm_files(cgrp, NULL, files, true); | 3969 | if (base_files) { |
| 3852 | if (err < 0) | 3970 | err = cgroup_addrm_files(cgrp, NULL, files, true); |
| 3853 | return err; | 3971 | if (err < 0) |
| 3972 | return err; | ||
| 3973 | } | ||
| 3854 | 3974 | ||
| 3855 | /* process cftsets of each subsystem */ | 3975 | /* process cftsets of each subsystem */ |
| 3856 | for_each_subsys(cgrp->root, ss) { | 3976 | for_each_subsys(cgrp->root, ss) { |
| 3857 | struct cftype_set *set; | 3977 | struct cftype_set *set; |
| 3978 | if (!test_bit(ss->subsys_id, &subsys_mask)) | ||
| 3979 | continue; | ||
| 3858 | 3980 | ||
| 3859 | list_for_each_entry(set, &ss->cftsets, node) | 3981 | list_for_each_entry(set, &ss->cftsets, node) |
| 3860 | cgroup_addrm_files(cgrp, ss, set->cfts, true); | 3982 | cgroup_addrm_files(cgrp, ss, set->cfts, true); |
| @@ -3954,8 +4076,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 3954 | set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); | 4076 | set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); |
| 3955 | 4077 | ||
| 3956 | for_each_subsys(root, ss) { | 4078 | for_each_subsys(root, ss) { |
| 3957 | struct cgroup_subsys_state *css = ss->create(cgrp); | 4079 | struct cgroup_subsys_state *css; |
| 3958 | 4080 | ||
| 4081 | css = ss->create(cgrp); | ||
| 3959 | if (IS_ERR(css)) { | 4082 | if (IS_ERR(css)) { |
| 3960 | err = PTR_ERR(css); | 4083 | err = PTR_ERR(css); |
| 3961 | goto err_destroy; | 4084 | goto err_destroy; |
| @@ -3969,6 +4092,15 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 3969 | /* At error, ->destroy() callback has to free assigned ID. */ | 4092 | /* At error, ->destroy() callback has to free assigned ID. */ |
| 3970 | if (clone_children(parent) && ss->post_clone) | 4093 | if (clone_children(parent) && ss->post_clone) |
| 3971 | ss->post_clone(cgrp); | 4094 | ss->post_clone(cgrp); |
| 4095 | |||
| 4096 | if (ss->broken_hierarchy && !ss->warned_broken_hierarchy && | ||
| 4097 | parent->parent) { | ||
| 4098 | pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", | ||
| 4099 | current->comm, current->pid, ss->name); | ||
| 4100 | if (!strcmp(ss->name, "memory")) | ||
| 4101 | pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n"); | ||
| 4102 | ss->warned_broken_hierarchy = true; | ||
| 4103 | } | ||
| 3972 | } | 4104 | } |
| 3973 | 4105 | ||
| 3974 | list_add(&cgrp->sibling, &cgrp->parent->children); | 4106 | list_add(&cgrp->sibling, &cgrp->parent->children); |
| @@ -3988,7 +4120,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 3988 | 4120 | ||
| 3989 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); | 4121 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); |
| 3990 | 4122 | ||
| 3991 | err = cgroup_populate_dir(cgrp); | 4123 | err = cgroup_populate_dir(cgrp, true, root->subsys_mask); |
| 3992 | /* If err < 0, we have a half-filled directory - oh well ;) */ | 4124 | /* If err < 0, we have a half-filled directory - oh well ;) */ |
| 3993 | 4125 | ||
| 3994 | mutex_unlock(&cgroup_mutex); | 4126 | mutex_unlock(&cgroup_mutex); |
| @@ -4321,8 +4453,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
| 4321 | * since cgroup_init_subsys will have already taken care of it. | 4453 | * since cgroup_init_subsys will have already taken care of it. |
| 4322 | */ | 4454 | */ |
| 4323 | if (ss->module == NULL) { | 4455 | if (ss->module == NULL) { |
| 4324 | /* a few sanity checks */ | 4456 | /* a sanity check */ |
| 4325 | BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT); | ||
| 4326 | BUG_ON(subsys[ss->subsys_id] != ss); | 4457 | BUG_ON(subsys[ss->subsys_id] != ss); |
| 4327 | return 0; | 4458 | return 0; |
| 4328 | } | 4459 | } |
| @@ -4330,24 +4461,8 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
| 4330 | /* init base cftset */ | 4461 | /* init base cftset */ |
| 4331 | cgroup_init_cftsets(ss); | 4462 | cgroup_init_cftsets(ss); |
| 4332 | 4463 | ||
| 4333 | /* | ||
| 4334 | * need to register a subsys id before anything else - for example, | ||
| 4335 | * init_cgroup_css needs it. | ||
| 4336 | */ | ||
| 4337 | mutex_lock(&cgroup_mutex); | 4464 | mutex_lock(&cgroup_mutex); |
| 4338 | /* find the first empty slot in the array */ | 4465 | subsys[ss->subsys_id] = ss; |
| 4339 | for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) { | ||
| 4340 | if (subsys[i] == NULL) | ||
| 4341 | break; | ||
| 4342 | } | ||
| 4343 | if (i == CGROUP_SUBSYS_COUNT) { | ||
| 4344 | /* maximum number of subsystems already registered! */ | ||
| 4345 | mutex_unlock(&cgroup_mutex); | ||
| 4346 | return -EBUSY; | ||
| 4347 | } | ||
| 4348 | /* assign ourselves the subsys_id */ | ||
| 4349 | ss->subsys_id = i; | ||
| 4350 | subsys[i] = ss; | ||
| 4351 | 4466 | ||
| 4352 | /* | 4467 | /* |
| 4353 | * no ss->create seems to need anything important in the ss struct, so | 4468 | * no ss->create seems to need anything important in the ss struct, so |
| @@ -4356,7 +4471,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
| 4356 | css = ss->create(dummytop); | 4471 | css = ss->create(dummytop); |
| 4357 | if (IS_ERR(css)) { | 4472 | if (IS_ERR(css)) { |
| 4358 | /* failure case - need to deassign the subsys[] slot. */ | 4473 | /* failure case - need to deassign the subsys[] slot. */ |
| 4359 | subsys[i] = NULL; | 4474 | subsys[ss->subsys_id] = NULL; |
| 4360 | mutex_unlock(&cgroup_mutex); | 4475 | mutex_unlock(&cgroup_mutex); |
| 4361 | return PTR_ERR(css); | 4476 | return PTR_ERR(css); |
| 4362 | } | 4477 | } |
| @@ -4372,7 +4487,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
| 4372 | if (ret) { | 4487 | if (ret) { |
| 4373 | dummytop->subsys[ss->subsys_id] = NULL; | 4488 | dummytop->subsys[ss->subsys_id] = NULL; |
| 4374 | ss->destroy(dummytop); | 4489 | ss->destroy(dummytop); |
| 4375 | subsys[i] = NULL; | 4490 | subsys[ss->subsys_id] = NULL; |
| 4376 | mutex_unlock(&cgroup_mutex); | 4491 | mutex_unlock(&cgroup_mutex); |
| 4377 | return ret; | 4492 | return ret; |
| 4378 | } | 4493 | } |
| @@ -4439,7 +4554,6 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
| 4439 | 4554 | ||
| 4440 | mutex_lock(&cgroup_mutex); | 4555 | mutex_lock(&cgroup_mutex); |
| 4441 | /* deassign the subsys_id */ | 4556 | /* deassign the subsys_id */ |
| 4442 | BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT); | ||
| 4443 | subsys[ss->subsys_id] = NULL; | 4557 | subsys[ss->subsys_id] = NULL; |
| 4444 | 4558 | ||
| 4445 | /* remove subsystem from rootnode's list of subsystems */ | 4559 | /* remove subsystem from rootnode's list of subsystems */ |
| @@ -4502,10 +4616,13 @@ int __init cgroup_init_early(void) | |||
| 4502 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) | 4616 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) |
| 4503 | INIT_HLIST_HEAD(&css_set_table[i]); | 4617 | INIT_HLIST_HEAD(&css_set_table[i]); |
| 4504 | 4618 | ||
| 4505 | /* at bootup time, we don't worry about modular subsystems */ | 4619 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 4506 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | ||
| 4507 | struct cgroup_subsys *ss = subsys[i]; | 4620 | struct cgroup_subsys *ss = subsys[i]; |
| 4508 | 4621 | ||
| 4622 | /* at bootup time, we don't worry about modular subsystems */ | ||
| 4623 | if (!ss || ss->module) | ||
| 4624 | continue; | ||
| 4625 | |||
| 4509 | BUG_ON(!ss->name); | 4626 | BUG_ON(!ss->name); |
| 4510 | BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN); | 4627 | BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN); |
| 4511 | BUG_ON(!ss->create); | 4628 | BUG_ON(!ss->create); |
| @@ -4538,9 +4655,12 @@ int __init cgroup_init(void) | |||
| 4538 | if (err) | 4655 | if (err) |
| 4539 | return err; | 4656 | return err; |
| 4540 | 4657 | ||
| 4541 | /* at bootup time, we don't worry about modular subsystems */ | 4658 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 4542 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | ||
| 4543 | struct cgroup_subsys *ss = subsys[i]; | 4659 | struct cgroup_subsys *ss = subsys[i]; |
| 4660 | |||
| 4661 | /* at bootup time, we don't worry about modular subsystems */ | ||
| 4662 | if (!ss || ss->module) | ||
| 4663 | continue; | ||
| 4544 | if (!ss->early_init) | 4664 | if (!ss->early_init) |
| 4545 | cgroup_init_subsys(ss); | 4665 | cgroup_init_subsys(ss); |
| 4546 | if (ss->use_id) | 4666 | if (ss->use_id) |
| @@ -4735,13 +4855,16 @@ void cgroup_fork_callbacks(struct task_struct *child) | |||
| 4735 | { | 4855 | { |
| 4736 | if (need_forkexit_callback) { | 4856 | if (need_forkexit_callback) { |
| 4737 | int i; | 4857 | int i; |
| 4738 | /* | 4858 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 4739 | * forkexit callbacks are only supported for builtin | ||
| 4740 | * subsystems, and the builtin section of the subsys array is | ||
| 4741 | * immutable, so we don't need to lock the subsys array here. | ||
| 4742 | */ | ||
| 4743 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | ||
| 4744 | struct cgroup_subsys *ss = subsys[i]; | 4859 | struct cgroup_subsys *ss = subsys[i]; |
| 4860 | |||
| 4861 | /* | ||
| 4862 | * forkexit callbacks are only supported for | ||
| 4863 | * builtin subsystems. | ||
| 4864 | */ | ||
| 4865 | if (!ss || ss->module) | ||
| 4866 | continue; | ||
| 4867 | |||
| 4745 | if (ss->fork) | 4868 | if (ss->fork) |
| 4746 | ss->fork(child); | 4869 | ss->fork(child); |
| 4747 | } | 4870 | } |
| @@ -4846,12 +4969,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
| 4846 | tsk->cgroups = &init_css_set; | 4969 | tsk->cgroups = &init_css_set; |
| 4847 | 4970 | ||
| 4848 | if (run_callbacks && need_forkexit_callback) { | 4971 | if (run_callbacks && need_forkexit_callback) { |
| 4849 | /* | 4972 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 4850 | * modular subsystems can't use callbacks, so no need to lock | ||
| 4851 | * the subsys array | ||
| 4852 | */ | ||
| 4853 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | ||
| 4854 | struct cgroup_subsys *ss = subsys[i]; | 4973 | struct cgroup_subsys *ss = subsys[i]; |
| 4974 | |||
| 4975 | /* modular subsystems can't use callbacks */ | ||
| 4976 | if (!ss || ss->module) | ||
| 4977 | continue; | ||
| 4978 | |||
| 4855 | if (ss->exit) { | 4979 | if (ss->exit) { |
| 4856 | struct cgroup *old_cgrp = | 4980 | struct cgroup *old_cgrp = |
| 4857 | rcu_dereference_raw(cg->subsys[i])->cgroup; | 4981 | rcu_dereference_raw(cg->subsys[i])->cgroup; |
| @@ -5037,13 +5161,17 @@ static int __init cgroup_disable(char *str) | |||
| 5037 | while ((token = strsep(&str, ",")) != NULL) { | 5161 | while ((token = strsep(&str, ",")) != NULL) { |
| 5038 | if (!*token) | 5162 | if (!*token) |
| 5039 | continue; | 5163 | continue; |
| 5040 | /* | 5164 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 5041 | * cgroup_disable, being at boot time, can't know about module | ||
| 5042 | * subsystems, so we don't worry about them. | ||
| 5043 | */ | ||
| 5044 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | ||
| 5045 | struct cgroup_subsys *ss = subsys[i]; | 5165 | struct cgroup_subsys *ss = subsys[i]; |
| 5046 | 5166 | ||
| 5167 | /* | ||
| 5168 | * cgroup_disable, being at boot time, can't | ||
| 5169 | * know about module subsystems, so we don't | ||
| 5170 | * worry about them. | ||
| 5171 | */ | ||
| 5172 | if (!ss || ss->module) | ||
| 5173 | continue; | ||
| 5174 | |||
| 5047 | if (!strcmp(token, ss->name)) { | 5175 | if (!strcmp(token, ss->name)) { |
| 5048 | ss->disabled = 1; | 5176 | ss->disabled = 1; |
| 5049 | printk(KERN_INFO "Disabling %s control group" | 5177 | printk(KERN_INFO "Disabling %s control group" |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 3649fc6b3eaa..b1724ce98981 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
| @@ -373,4 +373,12 @@ struct cgroup_subsys freezer_subsys = { | |||
| 373 | .can_attach = freezer_can_attach, | 373 | .can_attach = freezer_can_attach, |
| 374 | .fork = freezer_fork, | 374 | .fork = freezer_fork, |
| 375 | .base_cftypes = files, | 375 | .base_cftypes = files, |
| 376 | |||
| 377 | /* | ||
| 378 | * freezer subsys doesn't handle hierarchy at all. Frozen state | ||
| 379 | * should be inherited through the hierarchy - if a parent is | ||
| 380 | * frozen, all its children should be frozen. Fix it and remove | ||
| 381 | * the following. | ||
| 382 | */ | ||
| 383 | .broken_hierarchy = true, | ||
| 376 | }; | 384 | }; |
diff --git a/kernel/cpu.c b/kernel/cpu.c index f560598807c1..42bd331ee0ab 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -80,6 +80,10 @@ void put_online_cpus(void) | |||
| 80 | if (cpu_hotplug.active_writer == current) | 80 | if (cpu_hotplug.active_writer == current) |
| 81 | return; | 81 | return; |
| 82 | mutex_lock(&cpu_hotplug.lock); | 82 | mutex_lock(&cpu_hotplug.lock); |
| 83 | |||
| 84 | if (WARN_ON(!cpu_hotplug.refcount)) | ||
| 85 | cpu_hotplug.refcount++; /* try to fix things up */ | ||
| 86 | |||
| 83 | if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer)) | 87 | if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer)) |
| 84 | wake_up_process(cpu_hotplug.active_writer); | 88 | wake_up_process(cpu_hotplug.active_writer); |
| 85 | mutex_unlock(&cpu_hotplug.lock); | 89 | mutex_unlock(&cpu_hotplug.lock); |
diff --git a/kernel/cred.c b/kernel/cred.c index de728ac50d82..48cea3da6d05 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
| @@ -799,9 +799,15 @@ static void dump_invalid_creds(const struct cred *cred, const char *label, | |||
| 799 | atomic_read(&cred->usage), | 799 | atomic_read(&cred->usage), |
| 800 | read_cred_subscribers(cred)); | 800 | read_cred_subscribers(cred)); |
| 801 | printk(KERN_ERR "CRED: ->*uid = { %d,%d,%d,%d }\n", | 801 | printk(KERN_ERR "CRED: ->*uid = { %d,%d,%d,%d }\n", |
| 802 | cred->uid, cred->euid, cred->suid, cred->fsuid); | 802 | from_kuid_munged(&init_user_ns, cred->uid), |
| 803 | from_kuid_munged(&init_user_ns, cred->euid), | ||
| 804 | from_kuid_munged(&init_user_ns, cred->suid), | ||
| 805 | from_kuid_munged(&init_user_ns, cred->fsuid)); | ||
| 803 | printk(KERN_ERR "CRED: ->*gid = { %d,%d,%d,%d }\n", | 806 | printk(KERN_ERR "CRED: ->*gid = { %d,%d,%d,%d }\n", |
| 804 | cred->gid, cred->egid, cred->sgid, cred->fsgid); | 807 | from_kgid_munged(&init_user_ns, cred->gid), |
| 808 | from_kgid_munged(&init_user_ns, cred->egid), | ||
| 809 | from_kgid_munged(&init_user_ns, cred->sgid), | ||
| 810 | from_kgid_munged(&init_user_ns, cred->fsgid)); | ||
| 805 | #ifdef CONFIG_SECURITY | 811 | #ifdef CONFIG_SECURITY |
| 806 | printk(KERN_ERR "CRED: ->security is %p\n", cred->security); | 812 | printk(KERN_ERR "CRED: ->security is %p\n", cred->security); |
| 807 | if ((unsigned long) cred->security >= PAGE_SIZE && | 813 | if ((unsigned long) cred->security >= PAGE_SIZE && |
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 0557f24c6bca..9a61738cefc8 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c | |||
| @@ -672,6 +672,10 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) | |||
| 672 | { | 672 | { |
| 673 | struct kgdb_state kgdb_var; | 673 | struct kgdb_state kgdb_var; |
| 674 | struct kgdb_state *ks = &kgdb_var; | 674 | struct kgdb_state *ks = &kgdb_var; |
| 675 | int ret = 0; | ||
| 676 | |||
| 677 | if (arch_kgdb_ops.enable_nmi) | ||
| 678 | arch_kgdb_ops.enable_nmi(0); | ||
| 675 | 679 | ||
| 676 | ks->cpu = raw_smp_processor_id(); | 680 | ks->cpu = raw_smp_processor_id(); |
| 677 | ks->ex_vector = evector; | 681 | ks->ex_vector = evector; |
| @@ -681,13 +685,33 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) | |||
| 681 | ks->linux_regs = regs; | 685 | ks->linux_regs = regs; |
| 682 | 686 | ||
| 683 | if (kgdb_reenter_check(ks)) | 687 | if (kgdb_reenter_check(ks)) |
| 684 | return 0; /* Ouch, double exception ! */ | 688 | goto out; /* Ouch, double exception ! */ |
| 685 | if (kgdb_info[ks->cpu].enter_kgdb != 0) | 689 | if (kgdb_info[ks->cpu].enter_kgdb != 0) |
| 686 | return 0; | 690 | goto out; |
| 687 | 691 | ||
| 688 | return kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER); | 692 | ret = kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER); |
| 693 | out: | ||
| 694 | if (arch_kgdb_ops.enable_nmi) | ||
| 695 | arch_kgdb_ops.enable_nmi(1); | ||
| 696 | return ret; | ||
| 689 | } | 697 | } |
| 690 | 698 | ||
| 699 | /* | ||
| 700 | * GDB places a breakpoint at this function to know dynamically | ||
| 701 | * loaded objects. It's not defined static so that only one instance with this | ||
| 702 | * name exists in the kernel. | ||
| 703 | */ | ||
| 704 | |||
| 705 | static int module_event(struct notifier_block *self, unsigned long val, | ||
| 706 | void *data) | ||
| 707 | { | ||
| 708 | return 0; | ||
| 709 | } | ||
| 710 | |||
| 711 | static struct notifier_block dbg_module_load_nb = { | ||
| 712 | .notifier_call = module_event, | ||
| 713 | }; | ||
| 714 | |||
| 691 | int kgdb_nmicallback(int cpu, void *regs) | 715 | int kgdb_nmicallback(int cpu, void *regs) |
| 692 | { | 716 | { |
| 693 | #ifdef CONFIG_SMP | 717 | #ifdef CONFIG_SMP |
| @@ -816,6 +840,7 @@ static void kgdb_register_callbacks(void) | |||
| 816 | kgdb_arch_init(); | 840 | kgdb_arch_init(); |
| 817 | if (!dbg_is_early) | 841 | if (!dbg_is_early) |
| 818 | kgdb_arch_late(); | 842 | kgdb_arch_late(); |
| 843 | register_module_notifier(&dbg_module_load_nb); | ||
| 819 | register_reboot_notifier(&dbg_reboot_notifier); | 844 | register_reboot_notifier(&dbg_reboot_notifier); |
| 820 | atomic_notifier_chain_register(&panic_notifier_list, | 845 | atomic_notifier_chain_register(&panic_notifier_list, |
| 821 | &kgdb_panic_event_nb); | 846 | &kgdb_panic_event_nb); |
| @@ -839,6 +864,7 @@ static void kgdb_unregister_callbacks(void) | |||
| 839 | if (kgdb_io_module_registered) { | 864 | if (kgdb_io_module_registered) { |
| 840 | kgdb_io_module_registered = 0; | 865 | kgdb_io_module_registered = 0; |
| 841 | unregister_reboot_notifier(&dbg_reboot_notifier); | 866 | unregister_reboot_notifier(&dbg_reboot_notifier); |
| 867 | unregister_module_notifier(&dbg_module_load_nb); | ||
| 842 | atomic_notifier_chain_unregister(&panic_notifier_list, | 868 | atomic_notifier_chain_unregister(&panic_notifier_list, |
| 843 | &kgdb_panic_event_nb); | 869 | &kgdb_panic_event_nb); |
| 844 | kgdb_arch_exit(); | 870 | kgdb_arch_exit(); |
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 07c9bbb94a0b..b03e0e814e43 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c | |||
| @@ -129,6 +129,8 @@ kdb_bt(int argc, const char **argv) | |||
| 129 | } | 129 | } |
| 130 | /* Now the inactive tasks */ | 130 | /* Now the inactive tasks */ |
| 131 | kdb_do_each_thread(g, p) { | 131 | kdb_do_each_thread(g, p) { |
| 132 | if (KDB_FLAG(CMD_INTERRUPT)) | ||
| 133 | return 0; | ||
| 132 | if (task_curr(p)) | 134 | if (task_curr(p)) |
| 133 | continue; | 135 | continue; |
| 134 | if (kdb_bt1(p, mask, argcount, btaprompt)) | 136 | if (kdb_bt1(p, mask, argcount, btaprompt)) |
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 0a69d2adc4f3..14ff4849262c 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c | |||
| @@ -552,6 +552,7 @@ int vkdb_printf(const char *fmt, va_list ap) | |||
| 552 | { | 552 | { |
| 553 | int diag; | 553 | int diag; |
| 554 | int linecount; | 554 | int linecount; |
| 555 | int colcount; | ||
| 555 | int logging, saved_loglevel = 0; | 556 | int logging, saved_loglevel = 0; |
| 556 | int saved_trap_printk; | 557 | int saved_trap_printk; |
| 557 | int got_printf_lock = 0; | 558 | int got_printf_lock = 0; |
| @@ -584,6 +585,10 @@ int vkdb_printf(const char *fmt, va_list ap) | |||
| 584 | if (diag || linecount <= 1) | 585 | if (diag || linecount <= 1) |
| 585 | linecount = 24; | 586 | linecount = 24; |
| 586 | 587 | ||
| 588 | diag = kdbgetintenv("COLUMNS", &colcount); | ||
| 589 | if (diag || colcount <= 1) | ||
| 590 | colcount = 80; | ||
| 591 | |||
| 587 | diag = kdbgetintenv("LOGGING", &logging); | 592 | diag = kdbgetintenv("LOGGING", &logging); |
| 588 | if (diag) | 593 | if (diag) |
| 589 | logging = 0; | 594 | logging = 0; |
| @@ -690,7 +695,7 @@ kdb_printit: | |||
| 690 | gdbstub_msg_write(kdb_buffer, retlen); | 695 | gdbstub_msg_write(kdb_buffer, retlen); |
| 691 | } else { | 696 | } else { |
| 692 | if (dbg_io_ops && !dbg_io_ops->is_console) { | 697 | if (dbg_io_ops && !dbg_io_ops->is_console) { |
| 693 | len = strlen(kdb_buffer); | 698 | len = retlen; |
| 694 | cp = kdb_buffer; | 699 | cp = kdb_buffer; |
| 695 | while (len--) { | 700 | while (len--) { |
| 696 | dbg_io_ops->write_char(*cp); | 701 | dbg_io_ops->write_char(*cp); |
| @@ -709,11 +714,29 @@ kdb_printit: | |||
| 709 | printk(KERN_INFO "%s", kdb_buffer); | 714 | printk(KERN_INFO "%s", kdb_buffer); |
| 710 | } | 715 | } |
| 711 | 716 | ||
| 712 | if (KDB_STATE(PAGER) && strchr(kdb_buffer, '\n')) | 717 | if (KDB_STATE(PAGER)) { |
| 713 | kdb_nextline++; | 718 | /* |
| 719 | * Check printed string to decide how to bump the | ||
| 720 | * kdb_nextline to control when the more prompt should | ||
| 721 | * show up. | ||
| 722 | */ | ||
| 723 | int got = 0; | ||
| 724 | len = retlen; | ||
| 725 | while (len--) { | ||
| 726 | if (kdb_buffer[len] == '\n') { | ||
| 727 | kdb_nextline++; | ||
| 728 | got = 0; | ||
| 729 | } else if (kdb_buffer[len] == '\r') { | ||
| 730 | got = 0; | ||
| 731 | } else { | ||
| 732 | got++; | ||
| 733 | } | ||
| 734 | } | ||
| 735 | kdb_nextline += got / (colcount + 1); | ||
| 736 | } | ||
| 714 | 737 | ||
| 715 | /* check for having reached the LINES number of printed lines */ | 738 | /* check for having reached the LINES number of printed lines */ |
| 716 | if (kdb_nextline == linecount) { | 739 | if (kdb_nextline >= linecount) { |
| 717 | char buf1[16] = ""; | 740 | char buf1[16] = ""; |
| 718 | 741 | ||
| 719 | /* Watch out for recursion here. Any routine that calls | 742 | /* Watch out for recursion here. Any routine that calls |
| @@ -765,7 +788,7 @@ kdb_printit: | |||
| 765 | kdb_grepping_flag = 0; | 788 | kdb_grepping_flag = 0; |
| 766 | kdb_printf("\n"); | 789 | kdb_printf("\n"); |
| 767 | } else if (buf1[0] == ' ') { | 790 | } else if (buf1[0] == ' ') { |
| 768 | kdb_printf("\n"); | 791 | kdb_printf("\r"); |
| 769 | suspend_grep = 1; /* for this recursion */ | 792 | suspend_grep = 1; /* for this recursion */ |
| 770 | } else if (buf1[0] == '\n') { | 793 | } else if (buf1[0] == '\n') { |
| 771 | kdb_nextline = linecount - 1; | 794 | kdb_nextline = linecount - 1; |
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 31df1706b9a9..4d5f8d5612f3 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
| 22 | #include <linux/utsname.h> | 22 | #include <linux/utsname.h> |
| 23 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
| 24 | #include <linux/atomic.h> | ||
| 24 | #include <linux/module.h> | 25 | #include <linux/module.h> |
| 25 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
| 26 | #include <linux/init.h> | 27 | #include <linux/init.h> |
| @@ -2100,6 +2101,8 @@ static int kdb_dmesg(int argc, const char **argv) | |||
| 2100 | } | 2101 | } |
| 2101 | if (!lines--) | 2102 | if (!lines--) |
| 2102 | break; | 2103 | break; |
| 2104 | if (KDB_FLAG(CMD_INTERRUPT)) | ||
| 2105 | return 0; | ||
| 2103 | 2106 | ||
| 2104 | kdb_printf("%.*s\n", (int)len - 1, buf); | 2107 | kdb_printf("%.*s\n", (int)len - 1, buf); |
| 2105 | } | 2108 | } |
| @@ -2107,6 +2110,32 @@ static int kdb_dmesg(int argc, const char **argv) | |||
| 2107 | return 0; | 2110 | return 0; |
| 2108 | } | 2111 | } |
| 2109 | #endif /* CONFIG_PRINTK */ | 2112 | #endif /* CONFIG_PRINTK */ |
| 2113 | |||
| 2114 | /* Make sure we balance enable/disable calls, must disable first. */ | ||
| 2115 | static atomic_t kdb_nmi_disabled; | ||
| 2116 | |||
| 2117 | static int kdb_disable_nmi(int argc, const char *argv[]) | ||
| 2118 | { | ||
| 2119 | if (atomic_read(&kdb_nmi_disabled)) | ||
| 2120 | return 0; | ||
| 2121 | atomic_set(&kdb_nmi_disabled, 1); | ||
| 2122 | arch_kgdb_ops.enable_nmi(0); | ||
| 2123 | return 0; | ||
| 2124 | } | ||
| 2125 | |||
| 2126 | static int kdb_param_enable_nmi(const char *val, const struct kernel_param *kp) | ||
| 2127 | { | ||
| 2128 | if (!atomic_add_unless(&kdb_nmi_disabled, -1, 0)) | ||
| 2129 | return -EINVAL; | ||
| 2130 | arch_kgdb_ops.enable_nmi(1); | ||
| 2131 | return 0; | ||
| 2132 | } | ||
| 2133 | |||
| 2134 | static const struct kernel_param_ops kdb_param_ops_enable_nmi = { | ||
| 2135 | .set = kdb_param_enable_nmi, | ||
| 2136 | }; | ||
| 2137 | module_param_cb(enable_nmi, &kdb_param_ops_enable_nmi, NULL, 0600); | ||
| 2138 | |||
| 2110 | /* | 2139 | /* |
| 2111 | * kdb_cpu - This function implements the 'cpu' command. | 2140 | * kdb_cpu - This function implements the 'cpu' command. |
| 2112 | * cpu [<cpunum>] | 2141 | * cpu [<cpunum>] |
| @@ -2851,6 +2880,10 @@ static void __init kdb_inittab(void) | |||
| 2851 | kdb_register_repeat("dmesg", kdb_dmesg, "[lines]", | 2880 | kdb_register_repeat("dmesg", kdb_dmesg, "[lines]", |
| 2852 | "Display syslog buffer", 0, KDB_REPEAT_NONE); | 2881 | "Display syslog buffer", 0, KDB_REPEAT_NONE); |
| 2853 | #endif | 2882 | #endif |
| 2883 | if (arch_kgdb_ops.enable_nmi) { | ||
| 2884 | kdb_register_repeat("disable_nmi", kdb_disable_nmi, "", | ||
| 2885 | "Disable NMI entry to KDB", 0, KDB_REPEAT_NONE); | ||
| 2886 | } | ||
| 2854 | kdb_register_repeat("defcmd", kdb_defcmd, "name \"usage\" \"help\"", | 2887 | kdb_register_repeat("defcmd", kdb_defcmd, "name \"usage\" \"help\"", |
| 2855 | "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE); | 2888 | "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE); |
| 2856 | kdb_register_repeat("kill", kdb_kill, "<-signal> <pid>", | 2889 | kdb_register_repeat("kill", kdb_kill, "<-signal> <pid>", |
diff --git a/kernel/events/core.c b/kernel/events/core.c index fd15593c7f54..dbccf83c134d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -471,14 +471,13 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, | |||
| 471 | { | 471 | { |
| 472 | struct perf_cgroup *cgrp; | 472 | struct perf_cgroup *cgrp; |
| 473 | struct cgroup_subsys_state *css; | 473 | struct cgroup_subsys_state *css; |
| 474 | struct file *file; | 474 | struct fd f = fdget(fd); |
| 475 | int ret = 0, fput_needed; | 475 | int ret = 0; |
| 476 | 476 | ||
| 477 | file = fget_light(fd, &fput_needed); | 477 | if (!f.file) |
| 478 | if (!file) | ||
| 479 | return -EBADF; | 478 | return -EBADF; |
| 480 | 479 | ||
| 481 | css = cgroup_css_from_dir(file, perf_subsys_id); | 480 | css = cgroup_css_from_dir(f.file, perf_subsys_id); |
| 482 | if (IS_ERR(css)) { | 481 | if (IS_ERR(css)) { |
| 483 | ret = PTR_ERR(css); | 482 | ret = PTR_ERR(css); |
| 484 | goto out; | 483 | goto out; |
| @@ -504,7 +503,7 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, | |||
| 504 | ret = -EINVAL; | 503 | ret = -EINVAL; |
| 505 | } | 504 | } |
| 506 | out: | 505 | out: |
| 507 | fput_light(file, fput_needed); | 506 | fdput(f); |
| 508 | return ret; | 507 | return ret; |
| 509 | } | 508 | } |
| 510 | 509 | ||
| @@ -3237,21 +3236,18 @@ unlock: | |||
| 3237 | 3236 | ||
| 3238 | static const struct file_operations perf_fops; | 3237 | static const struct file_operations perf_fops; |
| 3239 | 3238 | ||
| 3240 | static struct file *perf_fget_light(int fd, int *fput_needed) | 3239 | static inline int perf_fget_light(int fd, struct fd *p) |
| 3241 | { | 3240 | { |
| 3242 | struct file *file; | 3241 | struct fd f = fdget(fd); |
| 3243 | 3242 | if (!f.file) | |
| 3244 | file = fget_light(fd, fput_needed); | 3243 | return -EBADF; |
| 3245 | if (!file) | ||
| 3246 | return ERR_PTR(-EBADF); | ||
| 3247 | 3244 | ||
| 3248 | if (file->f_op != &perf_fops) { | 3245 | if (f.file->f_op != &perf_fops) { |
| 3249 | fput_light(file, *fput_needed); | 3246 | fdput(f); |
| 3250 | *fput_needed = 0; | 3247 | return -EBADF; |
| 3251 | return ERR_PTR(-EBADF); | ||
| 3252 | } | 3248 | } |
| 3253 | 3249 | *p = f; | |
| 3254 | return file; | 3250 | return 0; |
| 3255 | } | 3251 | } |
| 3256 | 3252 | ||
| 3257 | static int perf_event_set_output(struct perf_event *event, | 3253 | static int perf_event_set_output(struct perf_event *event, |
| @@ -3283,22 +3279,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 3283 | 3279 | ||
| 3284 | case PERF_EVENT_IOC_SET_OUTPUT: | 3280 | case PERF_EVENT_IOC_SET_OUTPUT: |
| 3285 | { | 3281 | { |
| 3286 | struct file *output_file = NULL; | ||
| 3287 | struct perf_event *output_event = NULL; | ||
| 3288 | int fput_needed = 0; | ||
| 3289 | int ret; | 3282 | int ret; |
| 3290 | |||
| 3291 | if (arg != -1) { | 3283 | if (arg != -1) { |
| 3292 | output_file = perf_fget_light(arg, &fput_needed); | 3284 | struct perf_event *output_event; |
| 3293 | if (IS_ERR(output_file)) | 3285 | struct fd output; |
| 3294 | return PTR_ERR(output_file); | 3286 | ret = perf_fget_light(arg, &output); |
| 3295 | output_event = output_file->private_data; | 3287 | if (ret) |
| 3288 | return ret; | ||
| 3289 | output_event = output.file->private_data; | ||
| 3290 | ret = perf_event_set_output(event, output_event); | ||
| 3291 | fdput(output); | ||
| 3292 | } else { | ||
| 3293 | ret = perf_event_set_output(event, NULL); | ||
| 3296 | } | 3294 | } |
| 3297 | |||
| 3298 | ret = perf_event_set_output(event, output_event); | ||
| 3299 | if (output_event) | ||
| 3300 | fput_light(output_file, fput_needed); | ||
| 3301 | |||
| 3302 | return ret; | 3295 | return ret; |
| 3303 | } | 3296 | } |
| 3304 | 3297 | ||
| @@ -3681,7 +3674,7 @@ unlock: | |||
| 3681 | atomic_inc(&event->mmap_count); | 3674 | atomic_inc(&event->mmap_count); |
| 3682 | mutex_unlock(&event->mmap_mutex); | 3675 | mutex_unlock(&event->mmap_mutex); |
| 3683 | 3676 | ||
| 3684 | vma->vm_flags |= VM_RESERVED; | 3677 | vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; |
| 3685 | vma->vm_ops = &perf_mmap_vmops; | 3678 | vma->vm_ops = &perf_mmap_vmops; |
| 3686 | 3679 | ||
| 3687 | return ret; | 3680 | return ret; |
| @@ -6446,12 +6439,11 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 6446 | struct perf_event_attr attr; | 6439 | struct perf_event_attr attr; |
| 6447 | struct perf_event_context *ctx; | 6440 | struct perf_event_context *ctx; |
| 6448 | struct file *event_file = NULL; | 6441 | struct file *event_file = NULL; |
| 6449 | struct file *group_file = NULL; | 6442 | struct fd group = {NULL, 0}; |
| 6450 | struct task_struct *task = NULL; | 6443 | struct task_struct *task = NULL; |
| 6451 | struct pmu *pmu; | 6444 | struct pmu *pmu; |
| 6452 | int event_fd; | 6445 | int event_fd; |
| 6453 | int move_group = 0; | 6446 | int move_group = 0; |
| 6454 | int fput_needed = 0; | ||
| 6455 | int err; | 6447 | int err; |
| 6456 | 6448 | ||
| 6457 | /* for future expandability... */ | 6449 | /* for future expandability... */ |
| @@ -6481,17 +6473,15 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 6481 | if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1)) | 6473 | if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1)) |
| 6482 | return -EINVAL; | 6474 | return -EINVAL; |
| 6483 | 6475 | ||
| 6484 | event_fd = get_unused_fd_flags(O_RDWR); | 6476 | event_fd = get_unused_fd(); |
| 6485 | if (event_fd < 0) | 6477 | if (event_fd < 0) |
| 6486 | return event_fd; | 6478 | return event_fd; |
| 6487 | 6479 | ||
| 6488 | if (group_fd != -1) { | 6480 | if (group_fd != -1) { |
| 6489 | group_file = perf_fget_light(group_fd, &fput_needed); | 6481 | err = perf_fget_light(group_fd, &group); |
| 6490 | if (IS_ERR(group_file)) { | 6482 | if (err) |
| 6491 | err = PTR_ERR(group_file); | ||
| 6492 | goto err_fd; | 6483 | goto err_fd; |
| 6493 | } | 6484 | group_leader = group.file->private_data; |
| 6494 | group_leader = group_file->private_data; | ||
| 6495 | if (flags & PERF_FLAG_FD_OUTPUT) | 6485 | if (flags & PERF_FLAG_FD_OUTPUT) |
| 6496 | output_event = group_leader; | 6486 | output_event = group_leader; |
| 6497 | if (flags & PERF_FLAG_FD_NO_GROUP) | 6487 | if (flags & PERF_FLAG_FD_NO_GROUP) |
| @@ -6667,7 +6657,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 6667 | * of the group leader will find the pointer to itself in | 6657 | * of the group leader will find the pointer to itself in |
| 6668 | * perf_group_detach(). | 6658 | * perf_group_detach(). |
| 6669 | */ | 6659 | */ |
| 6670 | fput_light(group_file, fput_needed); | 6660 | fdput(group); |
| 6671 | fd_install(event_fd, event_file); | 6661 | fd_install(event_fd, event_file); |
| 6672 | return event_fd; | 6662 | return event_fd; |
| 6673 | 6663 | ||
| @@ -6681,7 +6671,7 @@ err_task: | |||
| 6681 | if (task) | 6671 | if (task) |
| 6682 | put_task_struct(task); | 6672 | put_task_struct(task); |
| 6683 | err_group_fd: | 6673 | err_group_fd: |
| 6684 | fput_light(group_file, fput_needed); | 6674 | fdput(group); |
| 6685 | err_fd: | 6675 | err_fd: |
| 6686 | put_unused_fd(event_fd); | 6676 | put_unused_fd(event_fd); |
| 6687 | return err; | 6677 | return err; |
| @@ -7506,5 +7496,12 @@ struct cgroup_subsys perf_subsys = { | |||
| 7506 | .destroy = perf_cgroup_destroy, | 7496 | .destroy = perf_cgroup_destroy, |
| 7507 | .exit = perf_cgroup_exit, | 7497 | .exit = perf_cgroup_exit, |
| 7508 | .attach = perf_cgroup_attach, | 7498 | .attach = perf_cgroup_attach, |
| 7499 | |||
| 7500 | /* | ||
| 7501 | * perf_event cgroup doesn't handle nesting correctly. | ||
| 7502 | * ctx->nr_cgroups adjustments should be propagated through the | ||
| 7503 | * cgroup hierarchy. Fix it and remove the following. | ||
| 7504 | */ | ||
| 7505 | .broken_hierarchy = true, | ||
| 7509 | }; | 7506 | }; |
| 7510 | #endif /* CONFIG_CGROUP_PERF */ | 7507 | #endif /* CONFIG_CGROUP_PERF */ |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 912ef48d28ab..98256bc71ee1 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
| @@ -141,10 +141,14 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, | |||
| 141 | spinlock_t *ptl; | 141 | spinlock_t *ptl; |
| 142 | pte_t *ptep; | 142 | pte_t *ptep; |
| 143 | int err; | 143 | int err; |
| 144 | /* For mmu_notifiers */ | ||
| 145 | const unsigned long mmun_start = addr; | ||
| 146 | const unsigned long mmun_end = addr + PAGE_SIZE; | ||
| 144 | 147 | ||
| 145 | /* For try_to_free_swap() and munlock_vma_page() below */ | 148 | /* For try_to_free_swap() and munlock_vma_page() below */ |
| 146 | lock_page(page); | 149 | lock_page(page); |
| 147 | 150 | ||
| 151 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | ||
| 148 | err = -EAGAIN; | 152 | err = -EAGAIN; |
| 149 | ptep = page_check_address(page, mm, addr, &ptl, 0); | 153 | ptep = page_check_address(page, mm, addr, &ptl, 0); |
| 150 | if (!ptep) | 154 | if (!ptep) |
| @@ -173,6 +177,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, | |||
| 173 | 177 | ||
| 174 | err = 0; | 178 | err = 0; |
| 175 | unlock: | 179 | unlock: |
| 180 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
| 176 | unlock_page(page); | 181 | unlock_page(page); |
| 177 | return err; | 182 | return err; |
| 178 | } | 183 | } |
| @@ -735,7 +740,6 @@ static struct map_info * | |||
| 735 | build_map_info(struct address_space *mapping, loff_t offset, bool is_register) | 740 | build_map_info(struct address_space *mapping, loff_t offset, bool is_register) |
| 736 | { | 741 | { |
| 737 | unsigned long pgoff = offset >> PAGE_SHIFT; | 742 | unsigned long pgoff = offset >> PAGE_SHIFT; |
| 738 | struct prio_tree_iter iter; | ||
| 739 | struct vm_area_struct *vma; | 743 | struct vm_area_struct *vma; |
| 740 | struct map_info *curr = NULL; | 744 | struct map_info *curr = NULL; |
| 741 | struct map_info *prev = NULL; | 745 | struct map_info *prev = NULL; |
| @@ -744,7 +748,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) | |||
| 744 | 748 | ||
| 745 | again: | 749 | again: |
| 746 | mutex_lock(&mapping->i_mmap_mutex); | 750 | mutex_lock(&mapping->i_mmap_mutex); |
| 747 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 751 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { |
| 748 | if (!valid_vma(vma, is_register)) | 752 | if (!valid_vma(vma, is_register)) |
| 749 | continue; | 753 | continue; |
| 750 | 754 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index f65345f9e5bb..346616c0092c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -457,108 +457,13 @@ void daemonize(const char *name, ...) | |||
| 457 | /* Become as one with the init task */ | 457 | /* Become as one with the init task */ |
| 458 | 458 | ||
| 459 | daemonize_fs_struct(); | 459 | daemonize_fs_struct(); |
| 460 | exit_files(current); | 460 | daemonize_descriptors(); |
| 461 | current->files = init_task.files; | ||
| 462 | atomic_inc(¤t->files->count); | ||
| 463 | 461 | ||
| 464 | reparent_to_kthreadd(); | 462 | reparent_to_kthreadd(); |
| 465 | } | 463 | } |
| 466 | 464 | ||
| 467 | EXPORT_SYMBOL(daemonize); | 465 | EXPORT_SYMBOL(daemonize); |
| 468 | 466 | ||
| 469 | static void close_files(struct files_struct * files) | ||
| 470 | { | ||
| 471 | int i, j; | ||
| 472 | struct fdtable *fdt; | ||
| 473 | |||
| 474 | j = 0; | ||
| 475 | |||
| 476 | /* | ||
| 477 | * It is safe to dereference the fd table without RCU or | ||
| 478 | * ->file_lock because this is the last reference to the | ||
| 479 | * files structure. But use RCU to shut RCU-lockdep up. | ||
| 480 | */ | ||
| 481 | rcu_read_lock(); | ||
| 482 | fdt = files_fdtable(files); | ||
| 483 | rcu_read_unlock(); | ||
| 484 | for (;;) { | ||
| 485 | unsigned long set; | ||
| 486 | i = j * BITS_PER_LONG; | ||
| 487 | if (i >= fdt->max_fds) | ||
| 488 | break; | ||
| 489 | set = fdt->open_fds[j++]; | ||
| 490 | while (set) { | ||
| 491 | if (set & 1) { | ||
| 492 | struct file * file = xchg(&fdt->fd[i], NULL); | ||
| 493 | if (file) { | ||
| 494 | filp_close(file, files); | ||
| 495 | cond_resched(); | ||
| 496 | } | ||
| 497 | } | ||
| 498 | i++; | ||
| 499 | set >>= 1; | ||
| 500 | } | ||
| 501 | } | ||
| 502 | } | ||
| 503 | |||
| 504 | struct files_struct *get_files_struct(struct task_struct *task) | ||
| 505 | { | ||
| 506 | struct files_struct *files; | ||
| 507 | |||
| 508 | task_lock(task); | ||
| 509 | files = task->files; | ||
| 510 | if (files) | ||
| 511 | atomic_inc(&files->count); | ||
| 512 | task_unlock(task); | ||
| 513 | |||
| 514 | return files; | ||
| 515 | } | ||
| 516 | |||
| 517 | void put_files_struct(struct files_struct *files) | ||
| 518 | { | ||
| 519 | struct fdtable *fdt; | ||
| 520 | |||
| 521 | if (atomic_dec_and_test(&files->count)) { | ||
| 522 | close_files(files); | ||
| 523 | /* | ||
| 524 | * Free the fd and fdset arrays if we expanded them. | ||
| 525 | * If the fdtable was embedded, pass files for freeing | ||
| 526 | * at the end of the RCU grace period. Otherwise, | ||
| 527 | * you can free files immediately. | ||
| 528 | */ | ||
| 529 | rcu_read_lock(); | ||
| 530 | fdt = files_fdtable(files); | ||
| 531 | if (fdt != &files->fdtab) | ||
| 532 | kmem_cache_free(files_cachep, files); | ||
| 533 | free_fdtable(fdt); | ||
| 534 | rcu_read_unlock(); | ||
| 535 | } | ||
| 536 | } | ||
| 537 | |||
| 538 | void reset_files_struct(struct files_struct *files) | ||
| 539 | { | ||
| 540 | struct task_struct *tsk = current; | ||
| 541 | struct files_struct *old; | ||
| 542 | |||
| 543 | old = tsk->files; | ||
| 544 | task_lock(tsk); | ||
| 545 | tsk->files = files; | ||
| 546 | task_unlock(tsk); | ||
| 547 | put_files_struct(old); | ||
| 548 | } | ||
| 549 | |||
| 550 | void exit_files(struct task_struct *tsk) | ||
| 551 | { | ||
| 552 | struct files_struct * files = tsk->files; | ||
| 553 | |||
| 554 | if (files) { | ||
| 555 | task_lock(tsk); | ||
| 556 | tsk->files = NULL; | ||
| 557 | task_unlock(tsk); | ||
| 558 | put_files_struct(files); | ||
| 559 | } | ||
| 560 | } | ||
| 561 | |||
| 562 | #ifdef CONFIG_MM_OWNER | 467 | #ifdef CONFIG_MM_OWNER |
| 563 | /* | 468 | /* |
| 564 | * A task is exiting. If it owned this mm, find a new owner for the mm. | 469 | * A task is exiting. If it owned this mm, find a new owner for the mm. |
| @@ -1046,6 +951,9 @@ void do_exit(long code) | |||
| 1046 | if (tsk->splice_pipe) | 951 | if (tsk->splice_pipe) |
| 1047 | __free_pipe_info(tsk->splice_pipe); | 952 | __free_pipe_info(tsk->splice_pipe); |
| 1048 | 953 | ||
| 954 | if (tsk->task_frag.page) | ||
| 955 | put_page(tsk->task_frag.page); | ||
| 956 | |||
| 1049 | validate_creds_for_do_exit(tsk); | 957 | validate_creds_for_do_exit(tsk); |
| 1050 | 958 | ||
| 1051 | preempt_disable(); | 959 | preempt_disable(); |
diff --git a/kernel/fork.c b/kernel/fork.c index 5a0e74d89a5a..8b20ab7d3aa2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -330,6 +330,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 330 | tsk->btrace_seq = 0; | 330 | tsk->btrace_seq = 0; |
| 331 | #endif | 331 | #endif |
| 332 | tsk->splice_pipe = NULL; | 332 | tsk->splice_pipe = NULL; |
| 333 | tsk->task_frag.page = NULL; | ||
| 333 | 334 | ||
| 334 | account_kernel_stack(ti, 1); | 335 | account_kernel_stack(ti, 1); |
| 335 | 336 | ||
| @@ -422,7 +423,12 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
| 422 | mapping->i_mmap_writable++; | 423 | mapping->i_mmap_writable++; |
| 423 | flush_dcache_mmap_lock(mapping); | 424 | flush_dcache_mmap_lock(mapping); |
| 424 | /* insert tmp into the share list, just after mpnt */ | 425 | /* insert tmp into the share list, just after mpnt */ |
| 425 | vma_prio_tree_add(tmp, mpnt); | 426 | if (unlikely(tmp->vm_flags & VM_NONLINEAR)) |
| 427 | vma_nonlinear_insert(tmp, | ||
| 428 | &mapping->i_mmap_nonlinear); | ||
| 429 | else | ||
| 430 | vma_interval_tree_insert_after(tmp, mpnt, | ||
| 431 | &mapping->i_mmap); | ||
| 426 | flush_dcache_mmap_unlock(mapping); | 432 | flush_dcache_mmap_unlock(mapping); |
| 427 | mutex_unlock(&mapping->i_mmap_mutex); | 433 | mutex_unlock(&mapping->i_mmap_mutex); |
| 428 | } | 434 | } |
| @@ -621,26 +627,6 @@ void mmput(struct mm_struct *mm) | |||
| 621 | } | 627 | } |
| 622 | EXPORT_SYMBOL_GPL(mmput); | 628 | EXPORT_SYMBOL_GPL(mmput); |
| 623 | 629 | ||
| 624 | /* | ||
| 625 | * We added or removed a vma mapping the executable. The vmas are only mapped | ||
| 626 | * during exec and are not mapped with the mmap system call. | ||
| 627 | * Callers must hold down_write() on the mm's mmap_sem for these | ||
| 628 | */ | ||
| 629 | void added_exe_file_vma(struct mm_struct *mm) | ||
| 630 | { | ||
| 631 | mm->num_exe_file_vmas++; | ||
| 632 | } | ||
| 633 | |||
| 634 | void removed_exe_file_vma(struct mm_struct *mm) | ||
| 635 | { | ||
| 636 | mm->num_exe_file_vmas--; | ||
| 637 | if ((mm->num_exe_file_vmas == 0) && mm->exe_file) { | ||
| 638 | fput(mm->exe_file); | ||
| 639 | mm->exe_file = NULL; | ||
| 640 | } | ||
| 641 | |||
| 642 | } | ||
| 643 | |||
| 644 | void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) | 630 | void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) |
| 645 | { | 631 | { |
| 646 | if (new_exe_file) | 632 | if (new_exe_file) |
| @@ -648,15 +634,13 @@ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) | |||
| 648 | if (mm->exe_file) | 634 | if (mm->exe_file) |
| 649 | fput(mm->exe_file); | 635 | fput(mm->exe_file); |
| 650 | mm->exe_file = new_exe_file; | 636 | mm->exe_file = new_exe_file; |
| 651 | mm->num_exe_file_vmas = 0; | ||
| 652 | } | 637 | } |
| 653 | 638 | ||
| 654 | struct file *get_mm_exe_file(struct mm_struct *mm) | 639 | struct file *get_mm_exe_file(struct mm_struct *mm) |
| 655 | { | 640 | { |
| 656 | struct file *exe_file; | 641 | struct file *exe_file; |
| 657 | 642 | ||
| 658 | /* We need mmap_sem to protect against races with removal of | 643 | /* We need mmap_sem to protect against races with removal of exe_file */ |
| 659 | * VM_EXECUTABLE vmas */ | ||
| 660 | down_read(&mm->mmap_sem); | 644 | down_read(&mm->mmap_sem); |
| 661 | exe_file = mm->exe_file; | 645 | exe_file = mm->exe_file; |
| 662 | if (exe_file) | 646 | if (exe_file) |
| @@ -1077,7 +1061,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
| 1077 | init_rwsem(&sig->group_rwsem); | 1061 | init_rwsem(&sig->group_rwsem); |
| 1078 | #endif | 1062 | #endif |
| 1079 | 1063 | ||
| 1080 | sig->oom_adj = current->signal->oom_adj; | ||
| 1081 | sig->oom_score_adj = current->signal->oom_score_adj; | 1064 | sig->oom_score_adj = current->signal->oom_score_adj; |
| 1082 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; | 1065 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; |
| 1083 | 1066 | ||
| @@ -1601,7 +1584,7 @@ long do_fork(unsigned long clone_flags, | |||
| 1601 | * requested, no event is reported; otherwise, report if the event | 1584 | * requested, no event is reported; otherwise, report if the event |
| 1602 | * for the type of forking is enabled. | 1585 | * for the type of forking is enabled. |
| 1603 | */ | 1586 | */ |
| 1604 | if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) { | 1587 | if (!(clone_flags & CLONE_UNTRACED) && likely(user_mode(regs))) { |
| 1605 | if (clone_flags & CLONE_VFORK) | 1588 | if (clone_flags & CLONE_VFORK) |
| 1606 | trace = PTRACE_EVENT_VFORK; | 1589 | trace = PTRACE_EVENT_VFORK; |
| 1607 | else if ((clone_flags & CSIGNAL) != SIGCHLD) | 1590 | else if ((clone_flags & CSIGNAL) != SIGCHLD) |
| @@ -1651,6 +1634,17 @@ long do_fork(unsigned long clone_flags, | |||
| 1651 | return nr; | 1634 | return nr; |
| 1652 | } | 1635 | } |
| 1653 | 1636 | ||
| 1637 | #ifdef CONFIG_GENERIC_KERNEL_THREAD | ||
| 1638 | /* | ||
| 1639 | * Create a kernel thread. | ||
| 1640 | */ | ||
| 1641 | pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | ||
| 1642 | { | ||
| 1643 | return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, NULL, | ||
| 1644 | (unsigned long)arg, NULL, NULL); | ||
| 1645 | } | ||
| 1646 | #endif | ||
| 1647 | |||
| 1654 | #ifndef ARCH_MIN_MMSTRUCT_ALIGN | 1648 | #ifndef ARCH_MIN_MMSTRUCT_ALIGN |
| 1655 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 | 1649 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 |
| 1656 | #endif | 1650 | #endif |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 49a77727db42..4e69e24d3d7d 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
| @@ -148,7 +148,8 @@ static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain, | |||
| 148 | * @host_data: Controller private data pointer | 148 | * @host_data: Controller private data pointer |
| 149 | * | 149 | * |
| 150 | * Allocates a legacy irq_domain if irq_base is positive or a linear | 150 | * Allocates a legacy irq_domain if irq_base is positive or a linear |
| 151 | * domain otherwise. | 151 | * domain otherwise. For the legacy domain, IRQ descriptors will also |
| 152 | * be allocated. | ||
| 152 | * | 153 | * |
| 153 | * This is intended to implement the expected behaviour for most | 154 | * This is intended to implement the expected behaviour for most |
| 154 | * interrupt controllers which is that a linear mapping should | 155 | * interrupt controllers which is that a linear mapping should |
| @@ -162,11 +163,33 @@ struct irq_domain *irq_domain_add_simple(struct device_node *of_node, | |||
| 162 | const struct irq_domain_ops *ops, | 163 | const struct irq_domain_ops *ops, |
| 163 | void *host_data) | 164 | void *host_data) |
| 164 | { | 165 | { |
| 165 | if (first_irq > 0) | 166 | if (first_irq > 0) { |
| 166 | return irq_domain_add_legacy(of_node, size, first_irq, 0, | 167 | int irq_base; |
| 168 | |||
| 169 | if (IS_ENABLED(CONFIG_SPARSE_IRQ)) { | ||
| 170 | /* | ||
| 171 | * Set the descriptor allocator to search for a | ||
| 172 | * 1-to-1 mapping, such as irq_alloc_desc_at(). | ||
| 173 | * Use of_node_to_nid() which is defined to | ||
| 174 | * numa_node_id() on platforms that have no custom | ||
| 175 | * implementation. | ||
| 176 | */ | ||
| 177 | irq_base = irq_alloc_descs(first_irq, first_irq, size, | ||
| 178 | of_node_to_nid(of_node)); | ||
| 179 | if (irq_base < 0) { | ||
| 180 | WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", | ||
| 181 | first_irq); | ||
| 182 | irq_base = first_irq; | ||
| 183 | } | ||
| 184 | } else | ||
| 185 | irq_base = first_irq; | ||
| 186 | |||
| 187 | return irq_domain_add_legacy(of_node, size, irq_base, 0, | ||
| 167 | ops, host_data); | 188 | ops, host_data); |
| 168 | else | 189 | } |
| 169 | return irq_domain_add_linear(of_node, size, ops, host_data); | 190 | |
| 191 | /* A linear domain is the default */ | ||
| 192 | return irq_domain_add_linear(of_node, size, ops, host_data); | ||
| 170 | } | 193 | } |
| 171 | 194 | ||
| 172 | /** | 195 | /** |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 43049192b5ec..60f48fa0fd0d 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
| @@ -118,6 +118,7 @@ void jump_label_rate_limit(struct static_key_deferred *key, | |||
| 118 | key->timeout = rl; | 118 | key->timeout = rl; |
| 119 | INIT_DELAYED_WORK(&key->work, jump_label_update_timeout); | 119 | INIT_DELAYED_WORK(&key->work, jump_label_update_timeout); |
| 120 | } | 120 | } |
| 121 | EXPORT_SYMBOL_GPL(jump_label_rate_limit); | ||
| 121 | 122 | ||
| 122 | static int addr_conflict(struct jump_entry *entry, void *start, void *end) | 123 | static int addr_conflict(struct jump_entry *entry, void *start, void *end) |
| 123 | { | 124 | { |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 0668d58d6413..5e4bd7864c5d 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
| @@ -21,7 +21,6 @@ | |||
| 21 | #include <linux/hardirq.h> | 21 | #include <linux/hardirq.h> |
| 22 | #include <linux/elf.h> | 22 | #include <linux/elf.h> |
| 23 | #include <linux/elfcore.h> | 23 | #include <linux/elfcore.h> |
| 24 | #include <generated/utsrelease.h> | ||
| 25 | #include <linux/utsname.h> | 24 | #include <linux/utsname.h> |
| 26 | #include <linux/numa.h> | 25 | #include <linux/numa.h> |
| 27 | #include <linux/suspend.h> | 26 | #include <linux/suspend.h> |
diff --git a/kernel/kmod.c b/kernel/kmod.c index 6f99aead66c6..1c317e386831 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include <linux/notifier.h> | 37 | #include <linux/notifier.h> |
| 38 | #include <linux/suspend.h> | 38 | #include <linux/suspend.h> |
| 39 | #include <linux/rwsem.h> | 39 | #include <linux/rwsem.h> |
| 40 | #include <linux/ptrace.h> | ||
| 40 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
| 41 | 42 | ||
| 42 | #include <trace/events/module.h> | 43 | #include <trace/events/module.h> |
| @@ -221,11 +222,13 @@ static int ____call_usermodehelper(void *data) | |||
| 221 | retval = kernel_execve(sub_info->path, | 222 | retval = kernel_execve(sub_info->path, |
| 222 | (const char *const *)sub_info->argv, | 223 | (const char *const *)sub_info->argv, |
| 223 | (const char *const *)sub_info->envp); | 224 | (const char *const *)sub_info->envp); |
| 225 | if (!retval) | ||
| 226 | return 0; | ||
| 224 | 227 | ||
| 225 | /* Exec failed? */ | 228 | /* Exec failed? */ |
| 226 | fail: | 229 | fail: |
| 227 | sub_info->retval = retval; | 230 | sub_info->retval = retval; |
| 228 | return 0; | 231 | do_exit(0); |
| 229 | } | 232 | } |
| 230 | 233 | ||
| 231 | static int call_helper(void *data) | 234 | static int call_helper(void *data) |
| @@ -292,7 +295,7 @@ static int wait_for_helper(void *data) | |||
| 292 | } | 295 | } |
| 293 | 296 | ||
| 294 | umh_complete(sub_info); | 297 | umh_complete(sub_info); |
| 295 | return 0; | 298 | do_exit(0); |
| 296 | } | 299 | } |
| 297 | 300 | ||
| 298 | /* This is run by khelper thread */ | 301 | /* This is run by khelper thread */ |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 146a6fa96825..29fb60caecb5 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
| 17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
| 18 | #include <linux/freezer.h> | 18 | #include <linux/freezer.h> |
| 19 | #include <linux/ptrace.h> | ||
| 19 | #include <trace/events/sched.h> | 20 | #include <trace/events/sched.h> |
| 20 | 21 | ||
| 21 | static DEFINE_SPINLOCK(kthread_create_lock); | 22 | static DEFINE_SPINLOCK(kthread_create_lock); |
diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c new file mode 100644 index 000000000000..4646eb2c3820 --- /dev/null +++ b/kernel/modsign_pubkey.c | |||
| @@ -0,0 +1,113 @@ | |||
| 1 | /* Public keys for module signature verification | ||
| 2 | * | ||
| 3 | * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. | ||
| 4 | * Written by David Howells (dhowells@redhat.com) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public Licence | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the Licence, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/kernel.h> | ||
| 13 | #include <linux/sched.h> | ||
| 14 | #include <linux/cred.h> | ||
| 15 | #include <linux/err.h> | ||
| 16 | #include <keys/asymmetric-type.h> | ||
| 17 | #include "module-internal.h" | ||
| 18 | |||
| 19 | struct key *modsign_keyring; | ||
| 20 | |||
| 21 | extern __initdata const u8 modsign_certificate_list[]; | ||
| 22 | extern __initdata const u8 modsign_certificate_list_end[]; | ||
| 23 | asm(".section .init.data,\"aw\"\n" | ||
| 24 | "modsign_certificate_list:\n" | ||
| 25 | ".incbin \"signing_key.x509\"\n" | ||
| 26 | ".incbin \"extra_certificates\"\n" | ||
| 27 | "modsign_certificate_list_end:" | ||
| 28 | ); | ||
| 29 | |||
| 30 | /* | ||
| 31 | * We need to make sure ccache doesn't cache the .o file as it doesn't notice | ||
| 32 | * if modsign.pub changes. | ||
| 33 | */ | ||
| 34 | static __initdata const char annoy_ccache[] = __TIME__ "foo"; | ||
| 35 | |||
| 36 | /* | ||
| 37 | * Load the compiled-in keys | ||
| 38 | */ | ||
| 39 | static __init int module_verify_init(void) | ||
| 40 | { | ||
| 41 | pr_notice("Initialise module verification\n"); | ||
| 42 | |||
| 43 | modsign_keyring = key_alloc(&key_type_keyring, ".module_sign", | ||
| 44 | KUIDT_INIT(0), KGIDT_INIT(0), | ||
| 45 | current_cred(), | ||
| 46 | (KEY_POS_ALL & ~KEY_POS_SETATTR) | | ||
| 47 | KEY_USR_VIEW | KEY_USR_READ, | ||
| 48 | KEY_ALLOC_NOT_IN_QUOTA); | ||
| 49 | if (IS_ERR(modsign_keyring)) | ||
| 50 | panic("Can't allocate module signing keyring\n"); | ||
| 51 | |||
| 52 | if (key_instantiate_and_link(modsign_keyring, NULL, 0, NULL, NULL) < 0) | ||
| 53 | panic("Can't instantiate module signing keyring\n"); | ||
| 54 | |||
| 55 | return 0; | ||
| 56 | } | ||
| 57 | |||
| 58 | /* | ||
| 59 | * Must be initialised before we try and load the keys into the keyring. | ||
| 60 | */ | ||
| 61 | device_initcall(module_verify_init); | ||
| 62 | |||
| 63 | /* | ||
| 64 | * Load the compiled-in keys | ||
| 65 | */ | ||
| 66 | static __init int load_module_signing_keys(void) | ||
| 67 | { | ||
| 68 | key_ref_t key; | ||
| 69 | const u8 *p, *end; | ||
| 70 | size_t plen; | ||
| 71 | |||
| 72 | pr_notice("Loading module verification certificates\n"); | ||
| 73 | |||
| 74 | end = modsign_certificate_list_end; | ||
| 75 | p = modsign_certificate_list; | ||
| 76 | while (p < end) { | ||
| 77 | /* Each cert begins with an ASN.1 SEQUENCE tag and must be more | ||
| 78 | * than 256 bytes in size. | ||
| 79 | */ | ||
| 80 | if (end - p < 4) | ||
| 81 | goto dodgy_cert; | ||
| 82 | if (p[0] != 0x30 && | ||
| 83 | p[1] != 0x82) | ||
| 84 | goto dodgy_cert; | ||
| 85 | plen = (p[2] << 8) | p[3]; | ||
| 86 | plen += 4; | ||
| 87 | if (plen > end - p) | ||
| 88 | goto dodgy_cert; | ||
| 89 | |||
| 90 | key = key_create_or_update(make_key_ref(modsign_keyring, 1), | ||
| 91 | "asymmetric", | ||
| 92 | NULL, | ||
| 93 | p, | ||
| 94 | plen, | ||
| 95 | (KEY_POS_ALL & ~KEY_POS_SETATTR) | | ||
| 96 | KEY_USR_VIEW, | ||
| 97 | KEY_ALLOC_NOT_IN_QUOTA); | ||
| 98 | if (IS_ERR(key)) | ||
| 99 | pr_err("MODSIGN: Problem loading in-kernel X.509 certificate (%ld)\n", | ||
| 100 | PTR_ERR(key)); | ||
| 101 | else | ||
| 102 | pr_notice("MODSIGN: Loaded cert '%s'\n", | ||
| 103 | key_ref_to_ptr(key)->description); | ||
| 104 | p += plen; | ||
| 105 | } | ||
| 106 | |||
| 107 | return 0; | ||
| 108 | |||
| 109 | dodgy_cert: | ||
| 110 | pr_err("MODSIGN: Problem parsing in-kernel X.509 certificate list\n"); | ||
| 111 | return 0; | ||
| 112 | } | ||
| 113 | late_initcall(load_module_signing_keys); | ||
diff --git a/kernel/module-internal.h b/kernel/module-internal.h new file mode 100644 index 000000000000..6114a13419bd --- /dev/null +++ b/kernel/module-internal.h | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | /* Module internals | ||
| 2 | * | ||
| 3 | * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. | ||
| 4 | * Written by David Howells (dhowells@redhat.com) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public Licence | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the Licence, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | extern struct key *modsign_keyring; | ||
| 13 | |||
| 14 | extern int mod_verify_sig(const void *mod, unsigned long modlen, | ||
| 15 | const void *sig, unsigned long siglen); | ||
diff --git a/kernel/module.c b/kernel/module.c index 4edbd9c11aca..0e2da8695f8e 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -58,6 +58,8 @@ | |||
| 58 | #include <linux/jump_label.h> | 58 | #include <linux/jump_label.h> |
| 59 | #include <linux/pfn.h> | 59 | #include <linux/pfn.h> |
| 60 | #include <linux/bsearch.h> | 60 | #include <linux/bsearch.h> |
| 61 | #include <linux/fips.h> | ||
| 62 | #include "module-internal.h" | ||
| 61 | 63 | ||
| 62 | #define CREATE_TRACE_POINTS | 64 | #define CREATE_TRACE_POINTS |
| 63 | #include <trace/events/module.h> | 65 | #include <trace/events/module.h> |
| @@ -102,6 +104,43 @@ static LIST_HEAD(modules); | |||
| 102 | struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ | 104 | struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ |
| 103 | #endif /* CONFIG_KGDB_KDB */ | 105 | #endif /* CONFIG_KGDB_KDB */ |
| 104 | 106 | ||
| 107 | #ifdef CONFIG_MODULE_SIG | ||
| 108 | #ifdef CONFIG_MODULE_SIG_FORCE | ||
| 109 | static bool sig_enforce = true; | ||
| 110 | #else | ||
| 111 | static bool sig_enforce = false; | ||
| 112 | |||
| 113 | static int param_set_bool_enable_only(const char *val, | ||
| 114 | const struct kernel_param *kp) | ||
| 115 | { | ||
| 116 | int err; | ||
| 117 | bool test; | ||
| 118 | struct kernel_param dummy_kp = *kp; | ||
| 119 | |||
| 120 | dummy_kp.arg = &test; | ||
| 121 | |||
| 122 | err = param_set_bool(val, &dummy_kp); | ||
| 123 | if (err) | ||
| 124 | return err; | ||
| 125 | |||
| 126 | /* Don't let them unset it once it's set! */ | ||
| 127 | if (!test && sig_enforce) | ||
| 128 | return -EROFS; | ||
| 129 | |||
| 130 | if (test) | ||
| 131 | sig_enforce = true; | ||
| 132 | return 0; | ||
| 133 | } | ||
| 134 | |||
| 135 | static const struct kernel_param_ops param_ops_bool_enable_only = { | ||
| 136 | .set = param_set_bool_enable_only, | ||
| 137 | .get = param_get_bool, | ||
| 138 | }; | ||
| 139 | #define param_check_bool_enable_only param_check_bool | ||
| 140 | |||
| 141 | module_param(sig_enforce, bool_enable_only, 0644); | ||
| 142 | #endif /* !CONFIG_MODULE_SIG_FORCE */ | ||
| 143 | #endif /* CONFIG_MODULE_SIG */ | ||
| 105 | 144 | ||
| 106 | /* Block module loading/unloading? */ | 145 | /* Block module loading/unloading? */ |
| 107 | int modules_disabled = 0; | 146 | int modules_disabled = 0; |
| @@ -136,6 +175,7 @@ struct load_info { | |||
| 136 | unsigned long symoffs, stroffs; | 175 | unsigned long symoffs, stroffs; |
| 137 | struct _ddebug *debug; | 176 | struct _ddebug *debug; |
| 138 | unsigned int num_debug; | 177 | unsigned int num_debug; |
| 178 | bool sig_ok; | ||
| 139 | struct { | 179 | struct { |
| 140 | unsigned int sym, str, mod, vers, info, pcpu; | 180 | unsigned int sym, str, mod, vers, info, pcpu; |
| 141 | } index; | 181 | } index; |
| @@ -1949,26 +1989,6 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) | |||
| 1949 | return ret; | 1989 | return ret; |
| 1950 | } | 1990 | } |
| 1951 | 1991 | ||
| 1952 | int __weak apply_relocate(Elf_Shdr *sechdrs, | ||
| 1953 | const char *strtab, | ||
| 1954 | unsigned int symindex, | ||
| 1955 | unsigned int relsec, | ||
| 1956 | struct module *me) | ||
| 1957 | { | ||
| 1958 | pr_err("module %s: REL relocation unsupported\n", me->name); | ||
| 1959 | return -ENOEXEC; | ||
| 1960 | } | ||
| 1961 | |||
| 1962 | int __weak apply_relocate_add(Elf_Shdr *sechdrs, | ||
| 1963 | const char *strtab, | ||
| 1964 | unsigned int symindex, | ||
| 1965 | unsigned int relsec, | ||
| 1966 | struct module *me) | ||
| 1967 | { | ||
| 1968 | pr_err("module %s: RELA relocation unsupported\n", me->name); | ||
| 1969 | return -ENOEXEC; | ||
| 1970 | } | ||
| 1971 | |||
| 1972 | static int apply_relocations(struct module *mod, const struct load_info *info) | 1992 | static int apply_relocations(struct module *mod, const struct load_info *info) |
| 1973 | { | 1993 | { |
| 1974 | unsigned int i; | 1994 | unsigned int i; |
| @@ -2399,7 +2419,52 @@ static inline void kmemleak_load_module(const struct module *mod, | |||
| 2399 | } | 2419 | } |
| 2400 | #endif | 2420 | #endif |
| 2401 | 2421 | ||
| 2402 | /* Sets info->hdr and info->len. */ | 2422 | #ifdef CONFIG_MODULE_SIG |
| 2423 | static int module_sig_check(struct load_info *info, | ||
| 2424 | const void *mod, unsigned long *len) | ||
| 2425 | { | ||
| 2426 | int err = -ENOKEY; | ||
| 2427 | const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; | ||
| 2428 | const void *p = mod, *end = mod + *len; | ||
| 2429 | |||
| 2430 | /* Poor man's memmem. */ | ||
| 2431 | while ((p = memchr(p, MODULE_SIG_STRING[0], end - p))) { | ||
| 2432 | if (p + markerlen > end) | ||
| 2433 | break; | ||
| 2434 | |||
| 2435 | if (memcmp(p, MODULE_SIG_STRING, markerlen) == 0) { | ||
| 2436 | const void *sig = p + markerlen; | ||
| 2437 | /* Truncate module up to signature. */ | ||
| 2438 | *len = p - mod; | ||
| 2439 | err = mod_verify_sig(mod, *len, sig, end - sig); | ||
| 2440 | break; | ||
| 2441 | } | ||
| 2442 | p++; | ||
| 2443 | } | ||
| 2444 | |||
| 2445 | if (!err) { | ||
| 2446 | info->sig_ok = true; | ||
| 2447 | return 0; | ||
| 2448 | } | ||
| 2449 | |||
| 2450 | /* Not having a signature is only an error if we're strict. */ | ||
| 2451 | if (err < 0 && fips_enabled) | ||
| 2452 | panic("Module verification failed with error %d in FIPS mode\n", | ||
| 2453 | err); | ||
| 2454 | if (err == -ENOKEY && !sig_enforce) | ||
| 2455 | err = 0; | ||
| 2456 | |||
| 2457 | return err; | ||
| 2458 | } | ||
| 2459 | #else /* !CONFIG_MODULE_SIG */ | ||
| 2460 | static int module_sig_check(struct load_info *info, | ||
| 2461 | void *mod, unsigned long *len) | ||
| 2462 | { | ||
| 2463 | return 0; | ||
| 2464 | } | ||
| 2465 | #endif /* !CONFIG_MODULE_SIG */ | ||
| 2466 | |||
| 2467 | /* Sets info->hdr, info->len and info->sig_ok. */ | ||
| 2403 | static int copy_and_check(struct load_info *info, | 2468 | static int copy_and_check(struct load_info *info, |
| 2404 | const void __user *umod, unsigned long len, | 2469 | const void __user *umod, unsigned long len, |
| 2405 | const char __user *uargs) | 2470 | const char __user *uargs) |
| @@ -2419,6 +2484,10 @@ static int copy_and_check(struct load_info *info, | |||
| 2419 | goto free_hdr; | 2484 | goto free_hdr; |
| 2420 | } | 2485 | } |
| 2421 | 2486 | ||
| 2487 | err = module_sig_check(info, hdr, &len); | ||
| 2488 | if (err) | ||
| 2489 | goto free_hdr; | ||
| 2490 | |||
| 2422 | /* Sanity checks against insmoding binaries or wrong arch, | 2491 | /* Sanity checks against insmoding binaries or wrong arch, |
| 2423 | weird elf version */ | 2492 | weird elf version */ |
| 2424 | if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 | 2493 | if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 |
| @@ -2730,6 +2799,10 @@ static int check_module_license_and_versions(struct module *mod) | |||
| 2730 | if (strcmp(mod->name, "driverloader") == 0) | 2799 | if (strcmp(mod->name, "driverloader") == 0) |
| 2731 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); | 2800 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); |
| 2732 | 2801 | ||
| 2802 | /* lve claims to be GPL but upstream won't provide source */ | ||
| 2803 | if (strcmp(mod->name, "lve") == 0) | ||
| 2804 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); | ||
| 2805 | |||
| 2733 | #ifdef CONFIG_MODVERSIONS | 2806 | #ifdef CONFIG_MODVERSIONS |
| 2734 | if ((mod->num_syms && !mod->crcs) | 2807 | if ((mod->num_syms && !mod->crcs) |
| 2735 | || (mod->num_gpl_syms && !mod->gpl_crcs) | 2808 | || (mod->num_gpl_syms && !mod->gpl_crcs) |
| @@ -2861,6 +2934,20 @@ static int post_relocation(struct module *mod, const struct load_info *info) | |||
| 2861 | return module_finalize(info->hdr, info->sechdrs, mod); | 2934 | return module_finalize(info->hdr, info->sechdrs, mod); |
| 2862 | } | 2935 | } |
| 2863 | 2936 | ||
| 2937 | /* Is this module of this name done loading? No locks held. */ | ||
| 2938 | static bool finished_loading(const char *name) | ||
| 2939 | { | ||
| 2940 | struct module *mod; | ||
| 2941 | bool ret; | ||
| 2942 | |||
| 2943 | mutex_lock(&module_mutex); | ||
| 2944 | mod = find_module(name); | ||
| 2945 | ret = !mod || mod->state != MODULE_STATE_COMING; | ||
| 2946 | mutex_unlock(&module_mutex); | ||
| 2947 | |||
| 2948 | return ret; | ||
| 2949 | } | ||
| 2950 | |||
| 2864 | /* Allocate and load the module: note that size of section 0 is always | 2951 | /* Allocate and load the module: note that size of section 0 is always |
| 2865 | zero, and we rely on this for optional sections. */ | 2952 | zero, and we rely on this for optional sections. */ |
| 2866 | static struct module *load_module(void __user *umod, | 2953 | static struct module *load_module(void __user *umod, |
| @@ -2868,7 +2955,7 @@ static struct module *load_module(void __user *umod, | |||
| 2868 | const char __user *uargs) | 2955 | const char __user *uargs) |
| 2869 | { | 2956 | { |
| 2870 | struct load_info info = { NULL, }; | 2957 | struct load_info info = { NULL, }; |
| 2871 | struct module *mod; | 2958 | struct module *mod, *old; |
| 2872 | long err; | 2959 | long err; |
| 2873 | 2960 | ||
| 2874 | pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n", | 2961 | pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n", |
| @@ -2886,6 +2973,12 @@ static struct module *load_module(void __user *umod, | |||
| 2886 | goto free_copy; | 2973 | goto free_copy; |
| 2887 | } | 2974 | } |
| 2888 | 2975 | ||
| 2976 | #ifdef CONFIG_MODULE_SIG | ||
| 2977 | mod->sig_ok = info.sig_ok; | ||
| 2978 | if (!mod->sig_ok) | ||
| 2979 | add_taint_module(mod, TAINT_FORCED_MODULE); | ||
| 2980 | #endif | ||
| 2981 | |||
| 2889 | /* Now module is in final location, initialize linked lists, etc. */ | 2982 | /* Now module is in final location, initialize linked lists, etc. */ |
| 2890 | err = module_unload_init(mod); | 2983 | err = module_unload_init(mod); |
| 2891 | if (err) | 2984 | if (err) |
| @@ -2934,8 +3027,18 @@ static struct module *load_module(void __user *umod, | |||
| 2934 | * function to insert in a way safe to concurrent readers. | 3027 | * function to insert in a way safe to concurrent readers. |
| 2935 | * The mutex protects against concurrent writers. | 3028 | * The mutex protects against concurrent writers. |
| 2936 | */ | 3029 | */ |
| 3030 | again: | ||
| 2937 | mutex_lock(&module_mutex); | 3031 | mutex_lock(&module_mutex); |
| 2938 | if (find_module(mod->name)) { | 3032 | if ((old = find_module(mod->name)) != NULL) { |
| 3033 | if (old->state == MODULE_STATE_COMING) { | ||
| 3034 | /* Wait in case it fails to load. */ | ||
| 3035 | mutex_unlock(&module_mutex); | ||
| 3036 | err = wait_event_interruptible(module_wq, | ||
| 3037 | finished_loading(mod->name)); | ||
| 3038 | if (err) | ||
| 3039 | goto free_arch_cleanup; | ||
| 3040 | goto again; | ||
| 3041 | } | ||
| 2939 | err = -EEXIST; | 3042 | err = -EEXIST; |
| 2940 | goto unlock; | 3043 | goto unlock; |
| 2941 | } | 3044 | } |
| @@ -2975,7 +3078,7 @@ static struct module *load_module(void __user *umod, | |||
| 2975 | /* Unlink carefully: kallsyms could be walking list. */ | 3078 | /* Unlink carefully: kallsyms could be walking list. */ |
| 2976 | list_del_rcu(&mod->list); | 3079 | list_del_rcu(&mod->list); |
| 2977 | module_bug_cleanup(mod); | 3080 | module_bug_cleanup(mod); |
| 2978 | 3081 | wake_up_all(&module_wq); | |
| 2979 | ddebug: | 3082 | ddebug: |
| 2980 | dynamic_debug_remove(info.debug); | 3083 | dynamic_debug_remove(info.debug); |
| 2981 | unlock: | 3084 | unlock: |
| @@ -3050,7 +3153,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, | |||
| 3050 | blocking_notifier_call_chain(&module_notify_list, | 3153 | blocking_notifier_call_chain(&module_notify_list, |
| 3051 | MODULE_STATE_GOING, mod); | 3154 | MODULE_STATE_GOING, mod); |
| 3052 | free_module(mod); | 3155 | free_module(mod); |
| 3053 | wake_up(&module_wq); | 3156 | wake_up_all(&module_wq); |
| 3054 | return ret; | 3157 | return ret; |
| 3055 | } | 3158 | } |
| 3056 | if (ret > 0) { | 3159 | if (ret > 0) { |
| @@ -3062,9 +3165,8 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, | |||
| 3062 | dump_stack(); | 3165 | dump_stack(); |
| 3063 | } | 3166 | } |
| 3064 | 3167 | ||
| 3065 | /* Now it's a first class citizen! Wake up anyone waiting for it. */ | 3168 | /* Now it's a first class citizen! */ |
| 3066 | mod->state = MODULE_STATE_LIVE; | 3169 | mod->state = MODULE_STATE_LIVE; |
| 3067 | wake_up(&module_wq); | ||
| 3068 | blocking_notifier_call_chain(&module_notify_list, | 3170 | blocking_notifier_call_chain(&module_notify_list, |
| 3069 | MODULE_STATE_LIVE, mod); | 3171 | MODULE_STATE_LIVE, mod); |
| 3070 | 3172 | ||
| @@ -3087,6 +3189,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, | |||
| 3087 | mod->init_ro_size = 0; | 3189 | mod->init_ro_size = 0; |
| 3088 | mod->init_text_size = 0; | 3190 | mod->init_text_size = 0; |
| 3089 | mutex_unlock(&module_mutex); | 3191 | mutex_unlock(&module_mutex); |
| 3192 | wake_up_all(&module_wq); | ||
| 3090 | 3193 | ||
| 3091 | return 0; | 3194 | return 0; |
| 3092 | } | 3195 | } |
diff --git a/kernel/module_signing.c b/kernel/module_signing.c new file mode 100644 index 000000000000..6b09f6983ac0 --- /dev/null +++ b/kernel/module_signing.c | |||
| @@ -0,0 +1,243 @@ | |||
| 1 | /* Module signature checker | ||
| 2 | * | ||
| 3 | * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. | ||
| 4 | * Written by David Howells (dhowells@redhat.com) | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public Licence | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the Licence, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/kernel.h> | ||
| 13 | #include <linux/err.h> | ||
| 14 | #include <crypto/public_key.h> | ||
| 15 | #include <crypto/hash.h> | ||
| 16 | #include <keys/asymmetric-type.h> | ||
| 17 | #include "module-internal.h" | ||
| 18 | |||
| 19 | /* | ||
| 20 | * Module signature information block. | ||
| 21 | * | ||
| 22 | * The constituents of the signature section are, in order: | ||
| 23 | * | ||
| 24 | * - Signer's name | ||
| 25 | * - Key identifier | ||
| 26 | * - Signature data | ||
| 27 | * - Information block | ||
| 28 | */ | ||
| 29 | struct module_signature { | ||
| 30 | enum pkey_algo algo : 8; /* Public-key crypto algorithm */ | ||
| 31 | enum pkey_hash_algo hash : 8; /* Digest algorithm */ | ||
| 32 | enum pkey_id_type id_type : 8; /* Key identifier type */ | ||
| 33 | u8 signer_len; /* Length of signer's name */ | ||
| 34 | u8 key_id_len; /* Length of key identifier */ | ||
| 35 | u8 __pad[3]; | ||
| 36 | __be32 sig_len; /* Length of signature data */ | ||
| 37 | }; | ||
| 38 | |||
| 39 | /* | ||
| 40 | * Digest the module contents. | ||
| 41 | */ | ||
| 42 | static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash, | ||
| 43 | const void *mod, | ||
| 44 | unsigned long modlen) | ||
| 45 | { | ||
| 46 | struct public_key_signature *pks; | ||
| 47 | struct crypto_shash *tfm; | ||
| 48 | struct shash_desc *desc; | ||
| 49 | size_t digest_size, desc_size; | ||
| 50 | int ret; | ||
| 51 | |||
| 52 | pr_devel("==>%s()\n", __func__); | ||
| 53 | |||
| 54 | /* Allocate the hashing algorithm we're going to need and find out how | ||
| 55 | * big the hash operational data will be. | ||
| 56 | */ | ||
| 57 | tfm = crypto_alloc_shash(pkey_hash_algo[hash], 0, 0); | ||
| 58 | if (IS_ERR(tfm)) | ||
| 59 | return (PTR_ERR(tfm) == -ENOENT) ? ERR_PTR(-ENOPKG) : ERR_CAST(tfm); | ||
| 60 | |||
| 61 | desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); | ||
| 62 | digest_size = crypto_shash_digestsize(tfm); | ||
| 63 | |||
| 64 | /* We allocate the hash operational data storage on the end of our | ||
| 65 | * context data and the digest output buffer on the end of that. | ||
| 66 | */ | ||
| 67 | ret = -ENOMEM; | ||
| 68 | pks = kzalloc(digest_size + sizeof(*pks) + desc_size, GFP_KERNEL); | ||
| 69 | if (!pks) | ||
| 70 | goto error_no_pks; | ||
| 71 | |||
| 72 | pks->pkey_hash_algo = hash; | ||
| 73 | pks->digest = (u8 *)pks + sizeof(*pks) + desc_size; | ||
| 74 | pks->digest_size = digest_size; | ||
| 75 | |||
| 76 | desc = (void *)pks + sizeof(*pks); | ||
| 77 | desc->tfm = tfm; | ||
| 78 | desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; | ||
| 79 | |||
| 80 | ret = crypto_shash_init(desc); | ||
| 81 | if (ret < 0) | ||
| 82 | goto error; | ||
| 83 | |||
| 84 | ret = crypto_shash_finup(desc, mod, modlen, pks->digest); | ||
| 85 | if (ret < 0) | ||
| 86 | goto error; | ||
| 87 | |||
| 88 | crypto_free_shash(tfm); | ||
| 89 | pr_devel("<==%s() = ok\n", __func__); | ||
| 90 | return pks; | ||
| 91 | |||
| 92 | error: | ||
| 93 | kfree(pks); | ||
| 94 | error_no_pks: | ||
| 95 | crypto_free_shash(tfm); | ||
| 96 | pr_devel("<==%s() = %d\n", __func__, ret); | ||
| 97 | return ERR_PTR(ret); | ||
| 98 | } | ||
| 99 | |||
| 100 | /* | ||
| 101 | * Extract an MPI array from the signature data. This represents the actual | ||
| 102 | * signature. Each raw MPI is prefaced by a BE 2-byte value indicating the | ||
| 103 | * size of the MPI in bytes. | ||
| 104 | * | ||
| 105 | * RSA signatures only have one MPI, so currently we only read one. | ||
| 106 | */ | ||
| 107 | static int mod_extract_mpi_array(struct public_key_signature *pks, | ||
| 108 | const void *data, size_t len) | ||
| 109 | { | ||
| 110 | size_t nbytes; | ||
| 111 | MPI mpi; | ||
| 112 | |||
| 113 | if (len < 3) | ||
| 114 | return -EBADMSG; | ||
| 115 | nbytes = ((const u8 *)data)[0] << 8 | ((const u8 *)data)[1]; | ||
| 116 | data += 2; | ||
| 117 | len -= 2; | ||
| 118 | if (len != nbytes) | ||
| 119 | return -EBADMSG; | ||
| 120 | |||
| 121 | mpi = mpi_read_raw_data(data, nbytes); | ||
| 122 | if (!mpi) | ||
| 123 | return -ENOMEM; | ||
| 124 | pks->mpi[0] = mpi; | ||
| 125 | pks->nr_mpi = 1; | ||
| 126 | return 0; | ||
| 127 | } | ||
| 128 | |||
| 129 | /* | ||
| 130 | * Request an asymmetric key. | ||
| 131 | */ | ||
| 132 | static struct key *request_asymmetric_key(const char *signer, size_t signer_len, | ||
| 133 | const u8 *key_id, size_t key_id_len) | ||
| 134 | { | ||
| 135 | key_ref_t key; | ||
| 136 | size_t i; | ||
| 137 | char *id, *q; | ||
| 138 | |||
| 139 | pr_devel("==>%s(,%zu,,%zu)\n", __func__, signer_len, key_id_len); | ||
| 140 | |||
| 141 | /* Construct an identifier. */ | ||
| 142 | id = kmalloc(signer_len + 2 + key_id_len * 2 + 1, GFP_KERNEL); | ||
| 143 | if (!id) | ||
| 144 | return ERR_PTR(-ENOKEY); | ||
| 145 | |||
| 146 | memcpy(id, signer, signer_len); | ||
| 147 | |||
| 148 | q = id + signer_len; | ||
| 149 | *q++ = ':'; | ||
| 150 | *q++ = ' '; | ||
| 151 | for (i = 0; i < key_id_len; i++) { | ||
| 152 | *q++ = hex_asc[*key_id >> 4]; | ||
| 153 | *q++ = hex_asc[*key_id++ & 0x0f]; | ||
| 154 | } | ||
| 155 | |||
| 156 | *q = 0; | ||
| 157 | |||
| 158 | pr_debug("Look up: \"%s\"\n", id); | ||
| 159 | |||
| 160 | key = keyring_search(make_key_ref(modsign_keyring, 1), | ||
| 161 | &key_type_asymmetric, id); | ||
| 162 | if (IS_ERR(key)) | ||
| 163 | pr_warn("Request for unknown module key '%s' err %ld\n", | ||
| 164 | id, PTR_ERR(key)); | ||
| 165 | kfree(id); | ||
| 166 | |||
| 167 | if (IS_ERR(key)) { | ||
| 168 | switch (PTR_ERR(key)) { | ||
| 169 | /* Hide some search errors */ | ||
| 170 | case -EACCES: | ||
| 171 | case -ENOTDIR: | ||
| 172 | case -EAGAIN: | ||
| 173 | return ERR_PTR(-ENOKEY); | ||
| 174 | default: | ||
| 175 | return ERR_CAST(key); | ||
| 176 | } | ||
| 177 | } | ||
| 178 | |||
| 179 | pr_devel("<==%s() = 0 [%x]\n", __func__, key_serial(key_ref_to_ptr(key))); | ||
| 180 | return key_ref_to_ptr(key); | ||
| 181 | } | ||
| 182 | |||
| 183 | /* | ||
| 184 | * Verify the signature on a module. | ||
| 185 | */ | ||
| 186 | int mod_verify_sig(const void *mod, unsigned long modlen, | ||
| 187 | const void *sig, unsigned long siglen) | ||
| 188 | { | ||
| 189 | struct public_key_signature *pks; | ||
| 190 | struct module_signature ms; | ||
| 191 | struct key *key; | ||
| 192 | size_t sig_len; | ||
| 193 | int ret; | ||
| 194 | |||
| 195 | pr_devel("==>%s(,%lu,,%lu,)\n", __func__, modlen, siglen); | ||
| 196 | |||
| 197 | if (siglen <= sizeof(ms)) | ||
| 198 | return -EBADMSG; | ||
| 199 | |||
| 200 | memcpy(&ms, sig + (siglen - sizeof(ms)), sizeof(ms)); | ||
| 201 | siglen -= sizeof(ms); | ||
| 202 | |||
| 203 | sig_len = be32_to_cpu(ms.sig_len); | ||
| 204 | if (sig_len >= siglen || | ||
| 205 | siglen - sig_len != (size_t)ms.signer_len + ms.key_id_len) | ||
| 206 | return -EBADMSG; | ||
| 207 | |||
| 208 | /* For the moment, only support RSA and X.509 identifiers */ | ||
| 209 | if (ms.algo != PKEY_ALGO_RSA || | ||
| 210 | ms.id_type != PKEY_ID_X509) | ||
| 211 | return -ENOPKG; | ||
| 212 | |||
| 213 | if (ms.hash >= PKEY_HASH__LAST || | ||
| 214 | !pkey_hash_algo[ms.hash]) | ||
| 215 | return -ENOPKG; | ||
| 216 | |||
| 217 | key = request_asymmetric_key(sig, ms.signer_len, | ||
| 218 | sig + ms.signer_len, ms.key_id_len); | ||
| 219 | if (IS_ERR(key)) | ||
| 220 | return PTR_ERR(key); | ||
| 221 | |||
| 222 | pks = mod_make_digest(ms.hash, mod, modlen); | ||
| 223 | if (IS_ERR(pks)) { | ||
| 224 | ret = PTR_ERR(pks); | ||
| 225 | goto error_put_key; | ||
| 226 | } | ||
| 227 | |||
| 228 | ret = mod_extract_mpi_array(pks, sig + ms.signer_len + ms.key_id_len, | ||
| 229 | sig_len); | ||
| 230 | if (ret < 0) | ||
| 231 | goto error_free_pks; | ||
| 232 | |||
| 233 | ret = verify_signature(key, pks); | ||
| 234 | pr_devel("verify_signature() = %d\n", ret); | ||
| 235 | |||
| 236 | error_free_pks: | ||
| 237 | mpi_free(pks->rsa.s); | ||
| 238 | kfree(pks); | ||
| 239 | error_put_key: | ||
| 240 | key_put(key); | ||
| 241 | pr_devel("<==%s() = %d\n", __func__, ret); | ||
| 242 | return ret; | ||
| 243 | } | ||
diff --git a/kernel/pid.c b/kernel/pid.c index e86b291ad834..aebd4f5aaf41 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
| @@ -479,6 +479,7 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) | |||
| 479 | } | 479 | } |
| 480 | return nr; | 480 | return nr; |
| 481 | } | 481 | } |
| 482 | EXPORT_SYMBOL_GPL(pid_nr_ns); | ||
| 482 | 483 | ||
| 483 | pid_t pid_vnr(struct pid *pid) | 484 | pid_t pid_vnr(struct pid *pid) |
| 484 | { | 485 | { |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 6144bab8fd8e..478bad2745e3 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
| 17 | #include <linux/proc_fs.h> | 17 | #include <linux/proc_fs.h> |
| 18 | #include <linux/reboot.h> | 18 | #include <linux/reboot.h> |
| 19 | #include <linux/export.h> | ||
| 19 | 20 | ||
| 20 | #define BITS_PER_PAGE (PAGE_SIZE*8) | 21 | #define BITS_PER_PAGE (PAGE_SIZE*8) |
| 21 | 22 | ||
| @@ -144,6 +145,7 @@ void free_pid_ns(struct kref *kref) | |||
| 144 | if (parent != NULL) | 145 | if (parent != NULL) |
| 145 | put_pid_ns(parent); | 146 | put_pid_ns(parent); |
| 146 | } | 147 | } |
| 148 | EXPORT_SYMBOL_GPL(free_pid_ns); | ||
| 147 | 149 | ||
| 148 | void zap_pid_ns_processes(struct pid_namespace *pid_ns) | 150 | void zap_pid_ns_processes(struct pid_namespace *pid_ns) |
| 149 | { | 151 | { |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index a70518c9d82f..5dfdc9ea180b 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
| @@ -263,6 +263,10 @@ config PM_GENERIC_DOMAINS | |||
| 263 | bool | 263 | bool |
| 264 | depends on PM | 264 | depends on PM |
| 265 | 265 | ||
| 266 | config PM_GENERIC_DOMAINS_SLEEP | ||
| 267 | def_bool y | ||
| 268 | depends on PM_SLEEP && PM_GENERIC_DOMAINS | ||
| 269 | |||
| 266 | config PM_GENERIC_DOMAINS_RUNTIME | 270 | config PM_GENERIC_DOMAINS_RUNTIME |
| 267 | def_bool y | 271 | def_bool y |
| 268 | depends on PM_RUNTIME && PM_GENERIC_DOMAINS | 272 | depends on PM_RUNTIME && PM_GENERIC_DOMAINS |
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c index d52359374e85..68197a4e8fc9 100644 --- a/kernel/power/poweroff.c +++ b/kernel/power/poweroff.c | |||
| @@ -37,7 +37,7 @@ static struct sysrq_key_op sysrq_poweroff_op = { | |||
| 37 | .enable_mask = SYSRQ_ENABLE_BOOT, | 37 | .enable_mask = SYSRQ_ENABLE_BOOT, |
| 38 | }; | 38 | }; |
| 39 | 39 | ||
| 40 | static int pm_sysrq_init(void) | 40 | static int __init pm_sysrq_init(void) |
| 41 | { | 41 | { |
| 42 | register_sysrq_key('o', &sysrq_poweroff_op); | 42 | register_sysrq_key('o', &sysrq_poweroff_op); |
| 43 | return 0; | 43 | return 0; |
diff --git a/kernel/power/process.c b/kernel/power/process.c index 19db29f67558..87da817f9e13 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
| @@ -79,7 +79,7 @@ static int try_to_freeze_tasks(bool user_only) | |||
| 79 | 79 | ||
| 80 | /* | 80 | /* |
| 81 | * We need to retry, but first give the freezing tasks some | 81 | * We need to retry, but first give the freezing tasks some |
| 82 | * time to enter the regrigerator. | 82 | * time to enter the refrigerator. |
| 83 | */ | 83 | */ |
| 84 | msleep(10); | 84 | msleep(10); |
| 85 | } | 85 | } |
diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 6a031e684026..846bd42c7ed1 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c | |||
| @@ -139,6 +139,7 @@ static inline int pm_qos_get_value(struct pm_qos_constraints *c) | |||
| 139 | default: | 139 | default: |
| 140 | /* runtime check for not using enum */ | 140 | /* runtime check for not using enum */ |
| 141 | BUG(); | 141 | BUG(); |
| 142 | return PM_QOS_DEFAULT_VALUE; | ||
| 142 | } | 143 | } |
| 143 | } | 144 | } |
| 144 | 145 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index 66a2ea37b576..2d607f4d1797 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -1890,7 +1890,6 @@ static int __cpuinit console_cpu_notify(struct notifier_block *self, | |||
| 1890 | switch (action) { | 1890 | switch (action) { |
| 1891 | case CPU_ONLINE: | 1891 | case CPU_ONLINE: |
| 1892 | case CPU_DEAD: | 1892 | case CPU_DEAD: |
| 1893 | case CPU_DYING: | ||
| 1894 | case CPU_DOWN_FAILED: | 1893 | case CPU_DOWN_FAILED: |
| 1895 | case CPU_UP_CANCELED: | 1894 | case CPU_UP_CANCELED: |
| 1896 | console_lock(); | 1895 | console_lock(); |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index a232bb59d93f..1f5e55dda955 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
| @@ -180,7 +180,8 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) | |||
| 180 | return has_ns_capability(current, ns, CAP_SYS_PTRACE); | 180 | return has_ns_capability(current, ns, CAP_SYS_PTRACE); |
| 181 | } | 181 | } |
| 182 | 182 | ||
| 183 | int __ptrace_may_access(struct task_struct *task, unsigned int mode) | 183 | /* Returns 0 on success, -errno on denial. */ |
| 184 | static int __ptrace_may_access(struct task_struct *task, unsigned int mode) | ||
| 184 | { | 185 | { |
| 185 | const struct cred *cred = current_cred(), *tcred; | 186 | const struct cred *cred = current_cred(), *tcred; |
| 186 | 187 | ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4fb2376ddf06..74df86bd9204 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -74,6 +74,7 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; | |||
| 74 | .orphan_nxttail = &sname##_state.orphan_nxtlist, \ | 74 | .orphan_nxttail = &sname##_state.orphan_nxtlist, \ |
| 75 | .orphan_donetail = &sname##_state.orphan_donelist, \ | 75 | .orphan_donetail = &sname##_state.orphan_donelist, \ |
| 76 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ | 76 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ |
| 77 | .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ | ||
| 77 | .name = #sname, \ | 78 | .name = #sname, \ |
| 78 | } | 79 | } |
| 79 | 80 | ||
| @@ -1197,7 +1198,7 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
| 1197 | raw_spin_unlock_irq(&rnp->lock); | 1198 | raw_spin_unlock_irq(&rnp->lock); |
| 1198 | 1199 | ||
| 1199 | /* Exclude any concurrent CPU-hotplug operations. */ | 1200 | /* Exclude any concurrent CPU-hotplug operations. */ |
| 1200 | get_online_cpus(); | 1201 | mutex_lock(&rsp->onoff_mutex); |
| 1201 | 1202 | ||
| 1202 | /* | 1203 | /* |
| 1203 | * Set the quiescent-state-needed bits in all the rcu_node | 1204 | * Set the quiescent-state-needed bits in all the rcu_node |
| @@ -1234,7 +1235,7 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
| 1234 | cond_resched(); | 1235 | cond_resched(); |
| 1235 | } | 1236 | } |
| 1236 | 1237 | ||
| 1237 | put_online_cpus(); | 1238 | mutex_unlock(&rsp->onoff_mutex); |
| 1238 | return 1; | 1239 | return 1; |
| 1239 | } | 1240 | } |
| 1240 | 1241 | ||
| @@ -1700,6 +1701,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
| 1700 | /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ | 1701 | /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ |
| 1701 | 1702 | ||
| 1702 | /* Exclude any attempts to start a new grace period. */ | 1703 | /* Exclude any attempts to start a new grace period. */ |
| 1704 | mutex_lock(&rsp->onoff_mutex); | ||
| 1703 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 1705 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
| 1704 | 1706 | ||
| 1705 | /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ | 1707 | /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ |
| @@ -1744,6 +1746,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
| 1744 | init_callback_list(rdp); | 1746 | init_callback_list(rdp); |
| 1745 | /* Disallow further callbacks on this CPU. */ | 1747 | /* Disallow further callbacks on this CPU. */ |
| 1746 | rdp->nxttail[RCU_NEXT_TAIL] = NULL; | 1748 | rdp->nxttail[RCU_NEXT_TAIL] = NULL; |
| 1749 | mutex_unlock(&rsp->onoff_mutex); | ||
| 1747 | } | 1750 | } |
| 1748 | 1751 | ||
| 1749 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1752 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
| @@ -2648,6 +2651,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
| 2648 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 2651 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
| 2649 | struct rcu_node *rnp = rcu_get_root(rsp); | 2652 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 2650 | 2653 | ||
| 2654 | /* Exclude new grace periods. */ | ||
| 2655 | mutex_lock(&rsp->onoff_mutex); | ||
| 2656 | |||
| 2651 | /* Set up local state, ensuring consistent view of global state. */ | 2657 | /* Set up local state, ensuring consistent view of global state. */ |
| 2652 | raw_spin_lock_irqsave(&rnp->lock, flags); | 2658 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 2653 | rdp->beenonline = 1; /* We have now been online. */ | 2659 | rdp->beenonline = 1; /* We have now been online. */ |
| @@ -2662,14 +2668,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
| 2662 | rcu_prepare_for_idle_init(cpu); | 2668 | rcu_prepare_for_idle_init(cpu); |
| 2663 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 2669 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
| 2664 | 2670 | ||
| 2665 | /* | ||
| 2666 | * A new grace period might start here. If so, we won't be part | ||
| 2667 | * of it, but that is OK, as we are currently in a quiescent state. | ||
| 2668 | */ | ||
| 2669 | |||
| 2670 | /* Exclude any attempts to start a new GP on large systems. */ | ||
| 2671 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ | ||
| 2672 | |||
| 2673 | /* Add CPU to rcu_node bitmasks. */ | 2671 | /* Add CPU to rcu_node bitmasks. */ |
| 2674 | rnp = rdp->mynode; | 2672 | rnp = rdp->mynode; |
| 2675 | mask = rdp->grpmask; | 2673 | mask = rdp->grpmask; |
| @@ -2693,8 +2691,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
| 2693 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ | 2691 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ |
| 2694 | rnp = rnp->parent; | 2692 | rnp = rnp->parent; |
| 2695 | } while (rnp != NULL && !(rnp->qsmaskinit & mask)); | 2693 | } while (rnp != NULL && !(rnp->qsmaskinit & mask)); |
| 2694 | local_irq_restore(flags); | ||
| 2696 | 2695 | ||
| 2697 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 2696 | mutex_unlock(&rsp->onoff_mutex); |
| 2698 | } | 2697 | } |
| 2699 | 2698 | ||
| 2700 | static void __cpuinit rcu_prepare_cpu(int cpu) | 2699 | static void __cpuinit rcu_prepare_cpu(int cpu) |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 5faf05d68326..a240f032848e 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
| @@ -394,11 +394,17 @@ struct rcu_state { | |||
| 394 | struct rcu_head **orphan_donetail; /* Tail of above. */ | 394 | struct rcu_head **orphan_donetail; /* Tail of above. */ |
| 395 | long qlen_lazy; /* Number of lazy callbacks. */ | 395 | long qlen_lazy; /* Number of lazy callbacks. */ |
| 396 | long qlen; /* Total number of callbacks. */ | 396 | long qlen; /* Total number of callbacks. */ |
| 397 | /* End of fields guarded by onofflock. */ | ||
| 398 | |||
| 399 | struct mutex onoff_mutex; /* Coordinate hotplug & GPs. */ | ||
| 400 | |||
| 397 | struct mutex barrier_mutex; /* Guards barrier fields. */ | 401 | struct mutex barrier_mutex; /* Guards barrier fields. */ |
| 398 | atomic_t barrier_cpu_count; /* # CPUs waiting on. */ | 402 | atomic_t barrier_cpu_count; /* # CPUs waiting on. */ |
| 399 | struct completion barrier_completion; /* Wake at barrier end. */ | 403 | struct completion barrier_completion; /* Wake at barrier end. */ |
| 400 | unsigned long n_barrier_done; /* ++ at start and end of */ | 404 | unsigned long n_barrier_done; /* ++ at start and end of */ |
| 401 | /* _rcu_barrier(). */ | 405 | /* _rcu_barrier(). */ |
| 406 | /* End of fields guarded by barrier_mutex. */ | ||
| 407 | |||
| 402 | unsigned long jiffies_force_qs; /* Time at which to invoke */ | 408 | unsigned long jiffies_force_qs; /* Time at which to invoke */ |
| 403 | /* force_quiescent_state(). */ | 409 | /* force_quiescent_state(). */ |
| 404 | unsigned long n_force_qs; /* Number of calls to */ | 410 | unsigned long n_force_qs; /* Number of calls to */ |
diff --git a/kernel/resource.c b/kernel/resource.c index 34d45886ee84..73f35d4b30b9 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
| @@ -763,6 +763,7 @@ static void __init __reserve_region_with_split(struct resource *root, | |||
| 763 | struct resource *parent = root; | 763 | struct resource *parent = root; |
| 764 | struct resource *conflict; | 764 | struct resource *conflict; |
| 765 | struct resource *res = kzalloc(sizeof(*res), GFP_ATOMIC); | 765 | struct resource *res = kzalloc(sizeof(*res), GFP_ATOMIC); |
| 766 | struct resource *next_res = NULL; | ||
| 766 | 767 | ||
| 767 | if (!res) | 768 | if (!res) |
| 768 | return; | 769 | return; |
| @@ -772,21 +773,46 @@ static void __init __reserve_region_with_split(struct resource *root, | |||
| 772 | res->end = end; | 773 | res->end = end; |
| 773 | res->flags = IORESOURCE_BUSY; | 774 | res->flags = IORESOURCE_BUSY; |
| 774 | 775 | ||
| 775 | conflict = __request_resource(parent, res); | 776 | while (1) { |
| 776 | if (!conflict) | ||
| 777 | return; | ||
| 778 | 777 | ||
| 779 | /* failed, split and try again */ | 778 | conflict = __request_resource(parent, res); |
| 780 | kfree(res); | 779 | if (!conflict) { |
| 780 | if (!next_res) | ||
| 781 | break; | ||
| 782 | res = next_res; | ||
| 783 | next_res = NULL; | ||
| 784 | continue; | ||
| 785 | } | ||
| 781 | 786 | ||
| 782 | /* conflict covered whole area */ | 787 | /* conflict covered whole area */ |
| 783 | if (conflict->start <= start && conflict->end >= end) | 788 | if (conflict->start <= res->start && |
| 784 | return; | 789 | conflict->end >= res->end) { |
| 790 | kfree(res); | ||
| 791 | WARN_ON(next_res); | ||
| 792 | break; | ||
| 793 | } | ||
| 794 | |||
| 795 | /* failed, split and try again */ | ||
| 796 | if (conflict->start > res->start) { | ||
| 797 | end = res->end; | ||
| 798 | res->end = conflict->start - 1; | ||
| 799 | if (conflict->end < end) { | ||
| 800 | next_res = kzalloc(sizeof(*next_res), | ||
| 801 | GFP_ATOMIC); | ||
| 802 | if (!next_res) { | ||
| 803 | kfree(res); | ||
| 804 | break; | ||
| 805 | } | ||
| 806 | next_res->name = name; | ||
| 807 | next_res->start = conflict->end + 1; | ||
| 808 | next_res->end = end; | ||
| 809 | next_res->flags = IORESOURCE_BUSY; | ||
| 810 | } | ||
| 811 | } else { | ||
| 812 | res->start = conflict->end + 1; | ||
| 813 | } | ||
| 814 | } | ||
| 785 | 815 | ||
| 786 | if (conflict->start > start) | ||
| 787 | __reserve_region_with_split(root, start, conflict->start-1, name); | ||
| 788 | if (conflict->end < end) | ||
| 789 | __reserve_region_with_split(root, conflict->end+1, end, name); | ||
| 790 | } | 816 | } |
| 791 | 817 | ||
| 792 | void __init reserve_region_with_split(struct resource *root, | 818 | void __init reserve_region_with_split(struct resource *root, |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c17747236438..2d8927fda712 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -505,7 +505,7 @@ static inline void init_hrtick(void) | |||
| 505 | #ifdef CONFIG_SMP | 505 | #ifdef CONFIG_SMP |
| 506 | 506 | ||
| 507 | #ifndef tsk_is_polling | 507 | #ifndef tsk_is_polling |
| 508 | #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) | 508 | #define tsk_is_polling(t) 0 |
| 509 | #endif | 509 | #endif |
| 510 | 510 | ||
| 511 | void resched_task(struct task_struct *p) | 511 | void resched_task(struct task_struct *p) |
| @@ -6122,6 +6122,17 @@ static void sched_init_numa(void) | |||
| 6122 | * numbers. | 6122 | * numbers. |
| 6123 | */ | 6123 | */ |
| 6124 | 6124 | ||
| 6125 | /* | ||
| 6126 | * Here, we should temporarily reset sched_domains_numa_levels to 0. | ||
| 6127 | * If it fails to allocate memory for array sched_domains_numa_masks[][], | ||
| 6128 | * the array will contain less then 'level' members. This could be | ||
| 6129 | * dangerous when we use it to iterate array sched_domains_numa_masks[][] | ||
| 6130 | * in other functions. | ||
| 6131 | * | ||
| 6132 | * We reset it to 'level' at the end of this function. | ||
| 6133 | */ | ||
| 6134 | sched_domains_numa_levels = 0; | ||
| 6135 | |||
| 6125 | sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); | 6136 | sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); |
| 6126 | if (!sched_domains_numa_masks) | 6137 | if (!sched_domains_numa_masks) |
| 6127 | return; | 6138 | return; |
| @@ -6176,11 +6187,68 @@ static void sched_init_numa(void) | |||
| 6176 | } | 6187 | } |
| 6177 | 6188 | ||
| 6178 | sched_domain_topology = tl; | 6189 | sched_domain_topology = tl; |
| 6190 | |||
| 6191 | sched_domains_numa_levels = level; | ||
| 6192 | } | ||
| 6193 | |||
| 6194 | static void sched_domains_numa_masks_set(int cpu) | ||
| 6195 | { | ||
| 6196 | int i, j; | ||
| 6197 | int node = cpu_to_node(cpu); | ||
| 6198 | |||
| 6199 | for (i = 0; i < sched_domains_numa_levels; i++) { | ||
| 6200 | for (j = 0; j < nr_node_ids; j++) { | ||
| 6201 | if (node_distance(j, node) <= sched_domains_numa_distance[i]) | ||
| 6202 | cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]); | ||
| 6203 | } | ||
| 6204 | } | ||
| 6205 | } | ||
| 6206 | |||
| 6207 | static void sched_domains_numa_masks_clear(int cpu) | ||
| 6208 | { | ||
| 6209 | int i, j; | ||
| 6210 | for (i = 0; i < sched_domains_numa_levels; i++) { | ||
| 6211 | for (j = 0; j < nr_node_ids; j++) | ||
| 6212 | cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]); | ||
| 6213 | } | ||
| 6214 | } | ||
| 6215 | |||
| 6216 | /* | ||
| 6217 | * Update sched_domains_numa_masks[level][node] array when new cpus | ||
| 6218 | * are onlined. | ||
| 6219 | */ | ||
| 6220 | static int sched_domains_numa_masks_update(struct notifier_block *nfb, | ||
| 6221 | unsigned long action, | ||
| 6222 | void *hcpu) | ||
| 6223 | { | ||
| 6224 | int cpu = (long)hcpu; | ||
| 6225 | |||
| 6226 | switch (action & ~CPU_TASKS_FROZEN) { | ||
| 6227 | case CPU_ONLINE: | ||
| 6228 | sched_domains_numa_masks_set(cpu); | ||
| 6229 | break; | ||
| 6230 | |||
| 6231 | case CPU_DEAD: | ||
| 6232 | sched_domains_numa_masks_clear(cpu); | ||
| 6233 | break; | ||
| 6234 | |||
| 6235 | default: | ||
| 6236 | return NOTIFY_DONE; | ||
| 6237 | } | ||
| 6238 | |||
| 6239 | return NOTIFY_OK; | ||
| 6179 | } | 6240 | } |
| 6180 | #else | 6241 | #else |
| 6181 | static inline void sched_init_numa(void) | 6242 | static inline void sched_init_numa(void) |
| 6182 | { | 6243 | { |
| 6183 | } | 6244 | } |
| 6245 | |||
| 6246 | static int sched_domains_numa_masks_update(struct notifier_block *nfb, | ||
| 6247 | unsigned long action, | ||
| 6248 | void *hcpu) | ||
| 6249 | { | ||
| 6250 | return 0; | ||
| 6251 | } | ||
| 6184 | #endif /* CONFIG_NUMA */ | 6252 | #endif /* CONFIG_NUMA */ |
| 6185 | 6253 | ||
| 6186 | static int __sdt_alloc(const struct cpumask *cpu_map) | 6254 | static int __sdt_alloc(const struct cpumask *cpu_map) |
| @@ -6629,6 +6697,7 @@ void __init sched_init_smp(void) | |||
| 6629 | mutex_unlock(&sched_domains_mutex); | 6697 | mutex_unlock(&sched_domains_mutex); |
| 6630 | put_online_cpus(); | 6698 | put_online_cpus(); |
| 6631 | 6699 | ||
| 6700 | hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE); | ||
| 6632 | hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE); | 6701 | hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE); |
| 6633 | hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE); | 6702 | hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE); |
| 6634 | 6703 | ||
diff --git a/kernel/signal.c b/kernel/signal.c index 2c681f11b7d2..0af8868525d6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
| 18 | #include <linux/tty.h> | 18 | #include <linux/tty.h> |
| 19 | #include <linux/binfmts.h> | 19 | #include <linux/binfmts.h> |
| 20 | #include <linux/coredump.h> | ||
| 20 | #include <linux/security.h> | 21 | #include <linux/security.h> |
| 21 | #include <linux/syscalls.h> | 22 | #include <linux/syscalls.h> |
| 22 | #include <linux/ptrace.h> | 23 | #include <linux/ptrace.h> |
| @@ -2359,7 +2360,7 @@ relock: | |||
| 2359 | * first and our do_group_exit call below will use | 2360 | * first and our do_group_exit call below will use |
| 2360 | * that value and ignore the one we pass it. | 2361 | * that value and ignore the one we pass it. |
| 2361 | */ | 2362 | */ |
| 2362 | do_coredump(info->si_signo, info->si_signo, regs); | 2363 | do_coredump(info, regs); |
| 2363 | } | 2364 | } |
| 2364 | 2365 | ||
| 2365 | /* | 2366 | /* |
diff --git a/kernel/srcu.c b/kernel/srcu.c index 2095be3318d5..97c465ebd844 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
| @@ -379,7 +379,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head, | |||
| 379 | rcu_batch_queue(&sp->batch_queue, head); | 379 | rcu_batch_queue(&sp->batch_queue, head); |
| 380 | if (!sp->running) { | 380 | if (!sp->running) { |
| 381 | sp->running = true; | 381 | sp->running = true; |
| 382 | queue_delayed_work(system_nrt_wq, &sp->work, 0); | 382 | schedule_delayed_work(&sp->work, 0); |
| 383 | } | 383 | } |
| 384 | spin_unlock_irqrestore(&sp->queue_lock, flags); | 384 | spin_unlock_irqrestore(&sp->queue_lock, flags); |
| 385 | } | 385 | } |
| @@ -631,7 +631,7 @@ static void srcu_reschedule(struct srcu_struct *sp) | |||
| 631 | } | 631 | } |
| 632 | 632 | ||
| 633 | if (pending) | 633 | if (pending) |
| 634 | queue_delayed_work(system_nrt_wq, &sp->work, SRCU_INTERVAL); | 634 | schedule_delayed_work(&sp->work, SRCU_INTERVAL); |
| 635 | } | 635 | } |
| 636 | 636 | ||
| 637 | /* | 637 | /* |
diff --git a/kernel/sys.c b/kernel/sys.c index 241507f23eca..c5cb5b99cb81 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -368,6 +368,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier); | |||
| 368 | void kernel_restart(char *cmd) | 368 | void kernel_restart(char *cmd) |
| 369 | { | 369 | { |
| 370 | kernel_restart_prepare(cmd); | 370 | kernel_restart_prepare(cmd); |
| 371 | disable_nonboot_cpus(); | ||
| 371 | if (!cmd) | 372 | if (!cmd) |
| 372 | printk(KERN_EMERG "Restarting system.\n"); | 373 | printk(KERN_EMERG "Restarting system.\n"); |
| 373 | else | 374 | else |
| @@ -1788,15 +1789,15 @@ SYSCALL_DEFINE1(umask, int, mask) | |||
| 1788 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1789 | #ifdef CONFIG_CHECKPOINT_RESTORE |
| 1789 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | 1790 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) |
| 1790 | { | 1791 | { |
| 1791 | struct file *exe_file; | 1792 | struct fd exe; |
| 1792 | struct dentry *dentry; | 1793 | struct dentry *dentry; |
| 1793 | int err; | 1794 | int err; |
| 1794 | 1795 | ||
| 1795 | exe_file = fget(fd); | 1796 | exe = fdget(fd); |
| 1796 | if (!exe_file) | 1797 | if (!exe.file) |
| 1797 | return -EBADF; | 1798 | return -EBADF; |
| 1798 | 1799 | ||
| 1799 | dentry = exe_file->f_path.dentry; | 1800 | dentry = exe.file->f_path.dentry; |
| 1800 | 1801 | ||
| 1801 | /* | 1802 | /* |
| 1802 | * Because the original mm->exe_file points to executable file, make | 1803 | * Because the original mm->exe_file points to executable file, make |
| @@ -1805,7 +1806,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | |||
| 1805 | */ | 1806 | */ |
| 1806 | err = -EACCES; | 1807 | err = -EACCES; |
| 1807 | if (!S_ISREG(dentry->d_inode->i_mode) || | 1808 | if (!S_ISREG(dentry->d_inode->i_mode) || |
| 1808 | exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC) | 1809 | exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) |
| 1809 | goto exit; | 1810 | goto exit; |
| 1810 | 1811 | ||
| 1811 | err = inode_permission(dentry->d_inode, MAY_EXEC); | 1812 | err = inode_permission(dentry->d_inode, MAY_EXEC); |
| @@ -1839,12 +1840,12 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | |||
| 1839 | goto exit_unlock; | 1840 | goto exit_unlock; |
| 1840 | 1841 | ||
| 1841 | err = 0; | 1842 | err = 0; |
| 1842 | set_mm_exe_file(mm, exe_file); | 1843 | set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ |
| 1843 | exit_unlock: | 1844 | exit_unlock: |
| 1844 | up_write(&mm->mmap_sem); | 1845 | up_write(&mm->mmap_sem); |
| 1845 | 1846 | ||
| 1846 | exit: | 1847 | exit: |
| 1847 | fput(exe_file); | 1848 | fdput(exe); |
| 1848 | return err; | 1849 | return err; |
| 1849 | } | 1850 | } |
| 1850 | 1851 | ||
| @@ -2204,7 +2205,7 @@ static int __orderly_poweroff(void) | |||
| 2204 | return -ENOMEM; | 2205 | return -ENOMEM; |
| 2205 | } | 2206 | } |
| 2206 | 2207 | ||
| 2207 | ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT, | 2208 | ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC, |
| 2208 | NULL, argv_cleanup, NULL); | 2209 | NULL, argv_cleanup, NULL); |
| 2209 | if (ret == -ENOMEM) | 2210 | if (ret == -ENOMEM) |
| 2210 | argv_free(argv); | 2211 | argv_free(argv); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 81c7b1a1a307..26f65eaa01f9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -97,10 +97,12 @@ | |||
| 97 | extern int sysctl_overcommit_memory; | 97 | extern int sysctl_overcommit_memory; |
| 98 | extern int sysctl_overcommit_ratio; | 98 | extern int sysctl_overcommit_ratio; |
| 99 | extern int max_threads; | 99 | extern int max_threads; |
| 100 | extern int core_uses_pid; | ||
| 101 | extern int suid_dumpable; | 100 | extern int suid_dumpable; |
| 101 | #ifdef CONFIG_COREDUMP | ||
| 102 | extern int core_uses_pid; | ||
| 102 | extern char core_pattern[]; | 103 | extern char core_pattern[]; |
| 103 | extern unsigned int core_pipe_limit; | 104 | extern unsigned int core_pipe_limit; |
| 105 | #endif | ||
| 104 | extern int pid_max; | 106 | extern int pid_max; |
| 105 | extern int min_free_kbytes; | 107 | extern int min_free_kbytes; |
| 106 | extern int pid_max_min, pid_max_max; | 108 | extern int pid_max_min, pid_max_max; |
| @@ -177,8 +179,10 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, | |||
| 177 | 179 | ||
| 178 | static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, | 180 | static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, |
| 179 | void __user *buffer, size_t *lenp, loff_t *ppos); | 181 | void __user *buffer, size_t *lenp, loff_t *ppos); |
| 182 | #ifdef CONFIG_COREDUMP | ||
| 180 | static int proc_dostring_coredump(struct ctl_table *table, int write, | 183 | static int proc_dostring_coredump(struct ctl_table *table, int write, |
| 181 | void __user *buffer, size_t *lenp, loff_t *ppos); | 184 | void __user *buffer, size_t *lenp, loff_t *ppos); |
| 185 | #endif | ||
| 182 | 186 | ||
| 183 | #ifdef CONFIG_MAGIC_SYSRQ | 187 | #ifdef CONFIG_MAGIC_SYSRQ |
| 184 | /* Note: sysrq code uses it's own private copy */ | 188 | /* Note: sysrq code uses it's own private copy */ |
| @@ -404,6 +408,7 @@ static struct ctl_table kern_table[] = { | |||
| 404 | .mode = 0644, | 408 | .mode = 0644, |
| 405 | .proc_handler = proc_dointvec, | 409 | .proc_handler = proc_dointvec, |
| 406 | }, | 410 | }, |
| 411 | #ifdef CONFIG_COREDUMP | ||
| 407 | { | 412 | { |
| 408 | .procname = "core_uses_pid", | 413 | .procname = "core_uses_pid", |
| 409 | .data = &core_uses_pid, | 414 | .data = &core_uses_pid, |
| @@ -425,6 +430,7 @@ static struct ctl_table kern_table[] = { | |||
| 425 | .mode = 0644, | 430 | .mode = 0644, |
| 426 | .proc_handler = proc_dointvec, | 431 | .proc_handler = proc_dointvec, |
| 427 | }, | 432 | }, |
| 433 | #endif | ||
| 428 | #ifdef CONFIG_PROC_SYSCTL | 434 | #ifdef CONFIG_PROC_SYSCTL |
| 429 | { | 435 | { |
| 430 | .procname = "tainted", | 436 | .procname = "tainted", |
| @@ -1543,8 +1549,7 @@ static struct ctl_table fs_table[] = { | |||
| 1543 | }; | 1549 | }; |
| 1544 | 1550 | ||
| 1545 | static struct ctl_table debug_table[] = { | 1551 | static struct ctl_table debug_table[] = { |
| 1546 | #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \ | 1552 | #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE |
| 1547 | defined(CONFIG_S390) || defined(CONFIG_TILE) | ||
| 1548 | { | 1553 | { |
| 1549 | .procname = "exception-trace", | 1554 | .procname = "exception-trace", |
| 1550 | .data = &show_unhandled_signals, | 1555 | .data = &show_unhandled_signals, |
| @@ -2036,12 +2041,14 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, | |||
| 2036 | 2041 | ||
| 2037 | static void validate_coredump_safety(void) | 2042 | static void validate_coredump_safety(void) |
| 2038 | { | 2043 | { |
| 2044 | #ifdef CONFIG_COREDUMP | ||
| 2039 | if (suid_dumpable == SUID_DUMPABLE_SAFE && | 2045 | if (suid_dumpable == SUID_DUMPABLE_SAFE && |
| 2040 | core_pattern[0] != '/' && core_pattern[0] != '|') { | 2046 | core_pattern[0] != '/' && core_pattern[0] != '|') { |
| 2041 | printk(KERN_WARNING "Unsafe core_pattern used with "\ | 2047 | printk(KERN_WARNING "Unsafe core_pattern used with "\ |
| 2042 | "suid_dumpable=2. Pipe handler or fully qualified "\ | 2048 | "suid_dumpable=2. Pipe handler or fully qualified "\ |
| 2043 | "core dump path required.\n"); | 2049 | "core dump path required.\n"); |
| 2044 | } | 2050 | } |
| 2051 | #endif | ||
| 2045 | } | 2052 | } |
| 2046 | 2053 | ||
| 2047 | static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, | 2054 | static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, |
| @@ -2053,6 +2060,7 @@ static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, | |||
| 2053 | return error; | 2060 | return error; |
| 2054 | } | 2061 | } |
| 2055 | 2062 | ||
| 2063 | #ifdef CONFIG_COREDUMP | ||
| 2056 | static int proc_dostring_coredump(struct ctl_table *table, int write, | 2064 | static int proc_dostring_coredump(struct ctl_table *table, int write, |
| 2057 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2065 | void __user *buffer, size_t *lenp, loff_t *ppos) |
| 2058 | { | 2066 | { |
| @@ -2061,6 +2069,7 @@ static int proc_dostring_coredump(struct ctl_table *table, int write, | |||
| 2061 | validate_coredump_safety(); | 2069 | validate_coredump_safety(); |
| 2062 | return error; | 2070 | return error; |
| 2063 | } | 2071 | } |
| 2072 | #endif | ||
| 2064 | 2073 | ||
| 2065 | static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, | 2074 | static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, |
| 2066 | void __user *buffer, | 2075 | void __user *buffer, |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d0a32796550f..145bb4d3bd4d 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <linux/cgroup.h> | 27 | #include <linux/cgroup.h> |
| 28 | #include <linux/fs.h> | 28 | #include <linux/fs.h> |
| 29 | #include <linux/file.h> | 29 | #include <linux/file.h> |
| 30 | #include <linux/pid_namespace.h> | ||
| 30 | #include <net/genetlink.h> | 31 | #include <net/genetlink.h> |
| 31 | #include <linux/atomic.h> | 32 | #include <linux/atomic.h> |
| 32 | 33 | ||
| @@ -174,7 +175,9 @@ static void send_cpu_listeners(struct sk_buff *skb, | |||
| 174 | up_write(&listeners->sem); | 175 | up_write(&listeners->sem); |
| 175 | } | 176 | } |
| 176 | 177 | ||
| 177 | static void fill_stats(struct task_struct *tsk, struct taskstats *stats) | 178 | static void fill_stats(struct user_namespace *user_ns, |
| 179 | struct pid_namespace *pid_ns, | ||
| 180 | struct task_struct *tsk, struct taskstats *stats) | ||
| 178 | { | 181 | { |
| 179 | memset(stats, 0, sizeof(*stats)); | 182 | memset(stats, 0, sizeof(*stats)); |
| 180 | /* | 183 | /* |
| @@ -190,7 +193,7 @@ static void fill_stats(struct task_struct *tsk, struct taskstats *stats) | |||
| 190 | stats->version = TASKSTATS_VERSION; | 193 | stats->version = TASKSTATS_VERSION; |
| 191 | stats->nvcsw = tsk->nvcsw; | 194 | stats->nvcsw = tsk->nvcsw; |
| 192 | stats->nivcsw = tsk->nivcsw; | 195 | stats->nivcsw = tsk->nivcsw; |
| 193 | bacct_add_tsk(stats, tsk); | 196 | bacct_add_tsk(user_ns, pid_ns, stats, tsk); |
| 194 | 197 | ||
| 195 | /* fill in extended acct fields */ | 198 | /* fill in extended acct fields */ |
| 196 | xacct_add_tsk(stats, tsk); | 199 | xacct_add_tsk(stats, tsk); |
| @@ -207,7 +210,7 @@ static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) | |||
| 207 | rcu_read_unlock(); | 210 | rcu_read_unlock(); |
| 208 | if (!tsk) | 211 | if (!tsk) |
| 209 | return -ESRCH; | 212 | return -ESRCH; |
| 210 | fill_stats(tsk, stats); | 213 | fill_stats(current_user_ns(), task_active_pid_ns(current), tsk, stats); |
| 211 | put_task_struct(tsk); | 214 | put_task_struct(tsk); |
| 212 | return 0; | 215 | return 0; |
| 213 | } | 216 | } |
| @@ -291,6 +294,12 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) | |||
| 291 | if (!cpumask_subset(mask, cpu_possible_mask)) | 294 | if (!cpumask_subset(mask, cpu_possible_mask)) |
| 292 | return -EINVAL; | 295 | return -EINVAL; |
| 293 | 296 | ||
| 297 | if (current_user_ns() != &init_user_ns) | ||
| 298 | return -EINVAL; | ||
| 299 | |||
| 300 | if (task_active_pid_ns(current) != &init_pid_ns) | ||
| 301 | return -EINVAL; | ||
| 302 | |||
| 294 | if (isadd == REGISTER) { | 303 | if (isadd == REGISTER) { |
| 295 | for_each_cpu(cpu, mask) { | 304 | for_each_cpu(cpu, mask) { |
| 296 | s = kmalloc_node(sizeof(struct listener), | 305 | s = kmalloc_node(sizeof(struct listener), |
| @@ -415,16 +424,15 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | |||
| 415 | struct nlattr *na; | 424 | struct nlattr *na; |
| 416 | size_t size; | 425 | size_t size; |
| 417 | u32 fd; | 426 | u32 fd; |
| 418 | struct file *file; | 427 | struct fd f; |
| 419 | int fput_needed; | ||
| 420 | 428 | ||
| 421 | na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; | 429 | na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; |
| 422 | if (!na) | 430 | if (!na) |
| 423 | return -EINVAL; | 431 | return -EINVAL; |
| 424 | 432 | ||
| 425 | fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); | 433 | fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); |
| 426 | file = fget_light(fd, &fput_needed); | 434 | f = fdget(fd); |
| 427 | if (!file) | 435 | if (!f.file) |
| 428 | return 0; | 436 | return 0; |
| 429 | 437 | ||
| 430 | size = nla_total_size(sizeof(struct cgroupstats)); | 438 | size = nla_total_size(sizeof(struct cgroupstats)); |
| @@ -437,6 +445,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | |||
| 437 | na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, | 445 | na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, |
| 438 | sizeof(struct cgroupstats)); | 446 | sizeof(struct cgroupstats)); |
| 439 | if (na == NULL) { | 447 | if (na == NULL) { |
| 448 | nlmsg_free(rep_skb); | ||
| 440 | rc = -EMSGSIZE; | 449 | rc = -EMSGSIZE; |
| 441 | goto err; | 450 | goto err; |
| 442 | } | 451 | } |
| @@ -444,7 +453,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | |||
| 444 | stats = nla_data(na); | 453 | stats = nla_data(na); |
| 445 | memset(stats, 0, sizeof(*stats)); | 454 | memset(stats, 0, sizeof(*stats)); |
| 446 | 455 | ||
| 447 | rc = cgroupstats_build(stats, file->f_dentry); | 456 | rc = cgroupstats_build(stats, f.file->f_dentry); |
| 448 | if (rc < 0) { | 457 | if (rc < 0) { |
| 449 | nlmsg_free(rep_skb); | 458 | nlmsg_free(rep_skb); |
| 450 | goto err; | 459 | goto err; |
| @@ -453,7 +462,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | |||
| 453 | rc = send_reply(rep_skb, info); | 462 | rc = send_reply(rep_skb, info); |
| 454 | 463 | ||
| 455 | err: | 464 | err: |
| 456 | fput_light(file, fput_needed); | 465 | fdput(f); |
| 457 | return rc; | 466 | return rc; |
| 458 | } | 467 | } |
| 459 | 468 | ||
| @@ -467,7 +476,7 @@ static int cmd_attr_register_cpumask(struct genl_info *info) | |||
| 467 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); | 476 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); |
| 468 | if (rc < 0) | 477 | if (rc < 0) |
| 469 | goto out; | 478 | goto out; |
| 470 | rc = add_del_listener(info->snd_pid, mask, REGISTER); | 479 | rc = add_del_listener(info->snd_portid, mask, REGISTER); |
| 471 | out: | 480 | out: |
| 472 | free_cpumask_var(mask); | 481 | free_cpumask_var(mask); |
| 473 | return rc; | 482 | return rc; |
| @@ -483,7 +492,7 @@ static int cmd_attr_deregister_cpumask(struct genl_info *info) | |||
| 483 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); | 492 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); |
| 484 | if (rc < 0) | 493 | if (rc < 0) |
| 485 | goto out; | 494 | goto out; |
| 486 | rc = add_del_listener(info->snd_pid, mask, DEREGISTER); | 495 | rc = add_del_listener(info->snd_portid, mask, DEREGISTER); |
| 487 | out: | 496 | out: |
| 488 | free_cpumask_var(mask); | 497 | free_cpumask_var(mask); |
| 489 | return rc; | 498 | return rc; |
| @@ -631,11 +640,12 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) | |||
| 631 | if (rc < 0) | 640 | if (rc < 0) |
| 632 | return; | 641 | return; |
| 633 | 642 | ||
| 634 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid); | 643 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, |
| 644 | task_pid_nr_ns(tsk, &init_pid_ns)); | ||
| 635 | if (!stats) | 645 | if (!stats) |
| 636 | goto err; | 646 | goto err; |
| 637 | 647 | ||
| 638 | fill_stats(tsk, stats); | 648 | fill_stats(&init_user_ns, &init_pid_ns, tsk, stats); |
| 639 | 649 | ||
| 640 | /* | 650 | /* |
| 641 | * Doesn't matter if tsk is the leader or the last group member leaving | 651 | * Doesn't matter if tsk is the leader or the last group member leaving |
| @@ -643,7 +653,8 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) | |||
| 643 | if (!is_thread_group || !group_dead) | 653 | if (!is_thread_group || !group_dead) |
| 644 | goto send; | 654 | goto send; |
| 645 | 655 | ||
| 646 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid); | 656 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, |
| 657 | task_tgid_nr_ns(tsk, &init_pid_ns)); | ||
| 647 | if (!stats) | 658 | if (!stats) |
| 648 | goto err; | 659 | goto err; |
| 649 | 660 | ||
diff --git a/kernel/time.c b/kernel/time.c index ba744cf80696..d226c6a3fd28 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
| @@ -30,7 +30,7 @@ | |||
| 30 | #include <linux/export.h> | 30 | #include <linux/export.h> |
| 31 | #include <linux/timex.h> | 31 | #include <linux/timex.h> |
| 32 | #include <linux/capability.h> | 32 | #include <linux/capability.h> |
| 33 | #include <linux/clocksource.h> | 33 | #include <linux/timekeeper_internal.h> |
| 34 | #include <linux/errno.h> | 34 | #include <linux/errno.h> |
| 35 | #include <linux/syscalls.h> | 35 | #include <linux/syscalls.h> |
| 36 | #include <linux/security.h> | 36 | #include <linux/security.h> |
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index fd42bd452b75..8601f0db1261 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig | |||
| @@ -16,6 +16,10 @@ config ARCH_CLOCKSOURCE_DATA | |||
| 16 | config GENERIC_TIME_VSYSCALL | 16 | config GENERIC_TIME_VSYSCALL |
| 17 | bool | 17 | bool |
| 18 | 18 | ||
| 19 | # Timekeeping vsyscall support | ||
| 20 | config GENERIC_TIME_VSYSCALL_OLD | ||
| 21 | bool | ||
| 22 | |||
| 19 | # ktime_t scalar 64bit nsec representation | 23 | # ktime_t scalar 64bit nsec representation |
| 20 | config KTIME_SCALAR | 24 | config KTIME_SCALAR |
| 21 | bool | 25 | bool |
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index aa27d391bfc8..f11d83b12949 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c | |||
| @@ -37,7 +37,6 @@ | |||
| 37 | static struct alarm_base { | 37 | static struct alarm_base { |
| 38 | spinlock_t lock; | 38 | spinlock_t lock; |
| 39 | struct timerqueue_head timerqueue; | 39 | struct timerqueue_head timerqueue; |
| 40 | struct hrtimer timer; | ||
| 41 | ktime_t (*gettime)(void); | 40 | ktime_t (*gettime)(void); |
| 42 | clockid_t base_clockid; | 41 | clockid_t base_clockid; |
| 43 | } alarm_bases[ALARM_NUMTYPE]; | 42 | } alarm_bases[ALARM_NUMTYPE]; |
| @@ -46,6 +45,8 @@ static struct alarm_base { | |||
| 46 | static ktime_t freezer_delta; | 45 | static ktime_t freezer_delta; |
| 47 | static DEFINE_SPINLOCK(freezer_delta_lock); | 46 | static DEFINE_SPINLOCK(freezer_delta_lock); |
| 48 | 47 | ||
| 48 | static struct wakeup_source *ws; | ||
| 49 | |||
| 49 | #ifdef CONFIG_RTC_CLASS | 50 | #ifdef CONFIG_RTC_CLASS |
| 50 | /* rtc timer and device for setting alarm wakeups at suspend */ | 51 | /* rtc timer and device for setting alarm wakeups at suspend */ |
| 51 | static struct rtc_timer rtctimer; | 52 | static struct rtc_timer rtctimer; |
| @@ -130,50 +131,35 @@ static inline void alarmtimer_rtc_timer_init(void) { } | |||
| 130 | * @base: pointer to the base where the timer is being run | 131 | * @base: pointer to the base where the timer is being run |
| 131 | * @alarm: pointer to alarm being enqueued. | 132 | * @alarm: pointer to alarm being enqueued. |
| 132 | * | 133 | * |
| 133 | * Adds alarm to a alarm_base timerqueue and if necessary sets | 134 | * Adds alarm to a alarm_base timerqueue |
| 134 | * an hrtimer to run. | ||
| 135 | * | 135 | * |
| 136 | * Must hold base->lock when calling. | 136 | * Must hold base->lock when calling. |
| 137 | */ | 137 | */ |
| 138 | static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) | 138 | static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) |
| 139 | { | 139 | { |
| 140 | if (alarm->state & ALARMTIMER_STATE_ENQUEUED) | ||
| 141 | timerqueue_del(&base->timerqueue, &alarm->node); | ||
| 142 | |||
| 140 | timerqueue_add(&base->timerqueue, &alarm->node); | 143 | timerqueue_add(&base->timerqueue, &alarm->node); |
| 141 | alarm->state |= ALARMTIMER_STATE_ENQUEUED; | 144 | alarm->state |= ALARMTIMER_STATE_ENQUEUED; |
| 142 | |||
| 143 | if (&alarm->node == timerqueue_getnext(&base->timerqueue)) { | ||
| 144 | hrtimer_try_to_cancel(&base->timer); | ||
| 145 | hrtimer_start(&base->timer, alarm->node.expires, | ||
| 146 | HRTIMER_MODE_ABS); | ||
| 147 | } | ||
| 148 | } | 145 | } |
| 149 | 146 | ||
| 150 | /** | 147 | /** |
| 151 | * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue | 148 | * alarmtimer_dequeue - Removes an alarm timer from an alarm_base timerqueue |
| 152 | * @base: pointer to the base where the timer is running | 149 | * @base: pointer to the base where the timer is running |
| 153 | * @alarm: pointer to alarm being removed | 150 | * @alarm: pointer to alarm being removed |
| 154 | * | 151 | * |
| 155 | * Removes alarm to a alarm_base timerqueue and if necessary sets | 152 | * Removes alarm to a alarm_base timerqueue |
| 156 | * a new timer to run. | ||
| 157 | * | 153 | * |
| 158 | * Must hold base->lock when calling. | 154 | * Must hold base->lock when calling. |
| 159 | */ | 155 | */ |
| 160 | static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) | 156 | static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm) |
| 161 | { | 157 | { |
| 162 | struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue); | ||
| 163 | |||
| 164 | if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED)) | 158 | if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED)) |
| 165 | return; | 159 | return; |
| 166 | 160 | ||
| 167 | timerqueue_del(&base->timerqueue, &alarm->node); | 161 | timerqueue_del(&base->timerqueue, &alarm->node); |
| 168 | alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; | 162 | alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; |
| 169 | |||
| 170 | if (next == &alarm->node) { | ||
| 171 | hrtimer_try_to_cancel(&base->timer); | ||
| 172 | next = timerqueue_getnext(&base->timerqueue); | ||
| 173 | if (!next) | ||
| 174 | return; | ||
| 175 | hrtimer_start(&base->timer, next->expires, HRTIMER_MODE_ABS); | ||
| 176 | } | ||
| 177 | } | 163 | } |
| 178 | 164 | ||
| 179 | 165 | ||
| @@ -188,42 +174,23 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) | |||
| 188 | */ | 174 | */ |
| 189 | static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) | 175 | static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) |
| 190 | { | 176 | { |
| 191 | struct alarm_base *base = container_of(timer, struct alarm_base, timer); | 177 | struct alarm *alarm = container_of(timer, struct alarm, timer); |
| 192 | struct timerqueue_node *next; | 178 | struct alarm_base *base = &alarm_bases[alarm->type]; |
| 193 | unsigned long flags; | 179 | unsigned long flags; |
| 194 | ktime_t now; | ||
| 195 | int ret = HRTIMER_NORESTART; | 180 | int ret = HRTIMER_NORESTART; |
| 196 | int restart = ALARMTIMER_NORESTART; | 181 | int restart = ALARMTIMER_NORESTART; |
| 197 | 182 | ||
| 198 | spin_lock_irqsave(&base->lock, flags); | 183 | spin_lock_irqsave(&base->lock, flags); |
| 199 | now = base->gettime(); | 184 | alarmtimer_dequeue(base, alarm); |
| 200 | while ((next = timerqueue_getnext(&base->timerqueue))) { | 185 | spin_unlock_irqrestore(&base->lock, flags); |
| 201 | struct alarm *alarm; | ||
| 202 | ktime_t expired = next->expires; | ||
| 203 | |||
| 204 | if (expired.tv64 > now.tv64) | ||
| 205 | break; | ||
| 206 | |||
| 207 | alarm = container_of(next, struct alarm, node); | ||
| 208 | |||
| 209 | timerqueue_del(&base->timerqueue, &alarm->node); | ||
| 210 | alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; | ||
| 211 | |||
| 212 | alarm->state |= ALARMTIMER_STATE_CALLBACK; | ||
| 213 | spin_unlock_irqrestore(&base->lock, flags); | ||
| 214 | if (alarm->function) | ||
| 215 | restart = alarm->function(alarm, now); | ||
| 216 | spin_lock_irqsave(&base->lock, flags); | ||
| 217 | alarm->state &= ~ALARMTIMER_STATE_CALLBACK; | ||
| 218 | 186 | ||
| 219 | if (restart != ALARMTIMER_NORESTART) { | 187 | if (alarm->function) |
| 220 | timerqueue_add(&base->timerqueue, &alarm->node); | 188 | restart = alarm->function(alarm, base->gettime()); |
| 221 | alarm->state |= ALARMTIMER_STATE_ENQUEUED; | ||
| 222 | } | ||
| 223 | } | ||
| 224 | 189 | ||
| 225 | if (next) { | 190 | spin_lock_irqsave(&base->lock, flags); |
| 226 | hrtimer_set_expires(&base->timer, next->expires); | 191 | if (restart != ALARMTIMER_NORESTART) { |
| 192 | hrtimer_set_expires(&alarm->timer, alarm->node.expires); | ||
| 193 | alarmtimer_enqueue(base, alarm); | ||
| 227 | ret = HRTIMER_RESTART; | 194 | ret = HRTIMER_RESTART; |
| 228 | } | 195 | } |
| 229 | spin_unlock_irqrestore(&base->lock, flags); | 196 | spin_unlock_irqrestore(&base->lock, flags); |
| @@ -250,6 +217,7 @@ static int alarmtimer_suspend(struct device *dev) | |||
| 250 | unsigned long flags; | 217 | unsigned long flags; |
| 251 | struct rtc_device *rtc; | 218 | struct rtc_device *rtc; |
| 252 | int i; | 219 | int i; |
| 220 | int ret; | ||
| 253 | 221 | ||
| 254 | spin_lock_irqsave(&freezer_delta_lock, flags); | 222 | spin_lock_irqsave(&freezer_delta_lock, flags); |
| 255 | min = freezer_delta; | 223 | min = freezer_delta; |
| @@ -279,8 +247,10 @@ static int alarmtimer_suspend(struct device *dev) | |||
| 279 | if (min.tv64 == 0) | 247 | if (min.tv64 == 0) |
| 280 | return 0; | 248 | return 0; |
| 281 | 249 | ||
| 282 | /* XXX - Should we enforce a minimum sleep time? */ | 250 | if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) { |
| 283 | WARN_ON(min.tv64 < NSEC_PER_SEC); | 251 | __pm_wakeup_event(ws, 2 * MSEC_PER_SEC); |
| 252 | return -EBUSY; | ||
| 253 | } | ||
| 284 | 254 | ||
| 285 | /* Setup an rtc timer to fire that far in the future */ | 255 | /* Setup an rtc timer to fire that far in the future */ |
| 286 | rtc_timer_cancel(rtc, &rtctimer); | 256 | rtc_timer_cancel(rtc, &rtctimer); |
| @@ -288,9 +258,11 @@ static int alarmtimer_suspend(struct device *dev) | |||
| 288 | now = rtc_tm_to_ktime(tm); | 258 | now = rtc_tm_to_ktime(tm); |
| 289 | now = ktime_add(now, min); | 259 | now = ktime_add(now, min); |
| 290 | 260 | ||
| 291 | rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); | 261 | /* Set alarm, if in the past reject suspend briefly to handle */ |
| 292 | 262 | ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); | |
| 293 | return 0; | 263 | if (ret < 0) |
| 264 | __pm_wakeup_event(ws, MSEC_PER_SEC); | ||
| 265 | return ret; | ||
| 294 | } | 266 | } |
| 295 | #else | 267 | #else |
| 296 | static int alarmtimer_suspend(struct device *dev) | 268 | static int alarmtimer_suspend(struct device *dev) |
| @@ -324,6 +296,9 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, | |||
| 324 | enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) | 296 | enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) |
| 325 | { | 297 | { |
| 326 | timerqueue_init(&alarm->node); | 298 | timerqueue_init(&alarm->node); |
| 299 | hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid, | ||
| 300 | HRTIMER_MODE_ABS); | ||
| 301 | alarm->timer.function = alarmtimer_fired; | ||
| 327 | alarm->function = function; | 302 | alarm->function = function; |
| 328 | alarm->type = type; | 303 | alarm->type = type; |
| 329 | alarm->state = ALARMTIMER_STATE_INACTIVE; | 304 | alarm->state = ALARMTIMER_STATE_INACTIVE; |
| @@ -334,17 +309,19 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, | |||
| 334 | * @alarm: ptr to alarm to set | 309 | * @alarm: ptr to alarm to set |
| 335 | * @start: time to run the alarm | 310 | * @start: time to run the alarm |
| 336 | */ | 311 | */ |
| 337 | void alarm_start(struct alarm *alarm, ktime_t start) | 312 | int alarm_start(struct alarm *alarm, ktime_t start) |
| 338 | { | 313 | { |
| 339 | struct alarm_base *base = &alarm_bases[alarm->type]; | 314 | struct alarm_base *base = &alarm_bases[alarm->type]; |
| 340 | unsigned long flags; | 315 | unsigned long flags; |
| 316 | int ret; | ||
| 341 | 317 | ||
| 342 | spin_lock_irqsave(&base->lock, flags); | 318 | spin_lock_irqsave(&base->lock, flags); |
| 343 | if (alarmtimer_active(alarm)) | ||
| 344 | alarmtimer_remove(base, alarm); | ||
| 345 | alarm->node.expires = start; | 319 | alarm->node.expires = start; |
| 346 | alarmtimer_enqueue(base, alarm); | 320 | alarmtimer_enqueue(base, alarm); |
| 321 | ret = hrtimer_start(&alarm->timer, alarm->node.expires, | ||
| 322 | HRTIMER_MODE_ABS); | ||
| 347 | spin_unlock_irqrestore(&base->lock, flags); | 323 | spin_unlock_irqrestore(&base->lock, flags); |
| 324 | return ret; | ||
| 348 | } | 325 | } |
| 349 | 326 | ||
| 350 | /** | 327 | /** |
| @@ -358,18 +335,12 @@ int alarm_try_to_cancel(struct alarm *alarm) | |||
| 358 | { | 335 | { |
| 359 | struct alarm_base *base = &alarm_bases[alarm->type]; | 336 | struct alarm_base *base = &alarm_bases[alarm->type]; |
| 360 | unsigned long flags; | 337 | unsigned long flags; |
| 361 | int ret = -1; | 338 | int ret; |
| 362 | spin_lock_irqsave(&base->lock, flags); | ||
| 363 | |||
| 364 | if (alarmtimer_callback_running(alarm)) | ||
| 365 | goto out; | ||
| 366 | 339 | ||
| 367 | if (alarmtimer_is_queued(alarm)) { | 340 | spin_lock_irqsave(&base->lock, flags); |
| 368 | alarmtimer_remove(base, alarm); | 341 | ret = hrtimer_try_to_cancel(&alarm->timer); |
| 369 | ret = 1; | 342 | if (ret >= 0) |
| 370 | } else | 343 | alarmtimer_dequeue(base, alarm); |
| 371 | ret = 0; | ||
| 372 | out: | ||
| 373 | spin_unlock_irqrestore(&base->lock, flags); | 344 | spin_unlock_irqrestore(&base->lock, flags); |
| 374 | return ret; | 345 | return ret; |
| 375 | } | 346 | } |
| @@ -802,10 +773,6 @@ static int __init alarmtimer_init(void) | |||
| 802 | for (i = 0; i < ALARM_NUMTYPE; i++) { | 773 | for (i = 0; i < ALARM_NUMTYPE; i++) { |
| 803 | timerqueue_init_head(&alarm_bases[i].timerqueue); | 774 | timerqueue_init_head(&alarm_bases[i].timerqueue); |
| 804 | spin_lock_init(&alarm_bases[i].lock); | 775 | spin_lock_init(&alarm_bases[i].lock); |
| 805 | hrtimer_init(&alarm_bases[i].timer, | ||
| 806 | alarm_bases[i].base_clockid, | ||
| 807 | HRTIMER_MODE_ABS); | ||
| 808 | alarm_bases[i].timer.function = alarmtimer_fired; | ||
| 809 | } | 776 | } |
| 810 | 777 | ||
| 811 | error = alarmtimer_rtc_interface_setup(); | 778 | error = alarmtimer_rtc_interface_setup(); |
| @@ -821,6 +788,7 @@ static int __init alarmtimer_init(void) | |||
| 821 | error = PTR_ERR(pdev); | 788 | error = PTR_ERR(pdev); |
| 822 | goto out_drv; | 789 | goto out_drv; |
| 823 | } | 790 | } |
| 791 | ws = wakeup_source_register("alarmtimer"); | ||
| 824 | return 0; | 792 | return 0; |
| 825 | 793 | ||
| 826 | out_drv: | 794 | out_drv: |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 7e1ce012a851..30b6de0d977c 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
| @@ -397,6 +397,30 @@ void clockevents_exchange_device(struct clock_event_device *old, | |||
| 397 | local_irq_restore(flags); | 397 | local_irq_restore(flags); |
| 398 | } | 398 | } |
| 399 | 399 | ||
| 400 | /** | ||
| 401 | * clockevents_suspend - suspend clock devices | ||
| 402 | */ | ||
| 403 | void clockevents_suspend(void) | ||
| 404 | { | ||
| 405 | struct clock_event_device *dev; | ||
| 406 | |||
| 407 | list_for_each_entry_reverse(dev, &clockevent_devices, list) | ||
| 408 | if (dev->suspend) | ||
| 409 | dev->suspend(dev); | ||
| 410 | } | ||
| 411 | |||
| 412 | /** | ||
| 413 | * clockevents_resume - resume clock devices | ||
| 414 | */ | ||
| 415 | void clockevents_resume(void) | ||
| 416 | { | ||
| 417 | struct clock_event_device *dev; | ||
| 418 | |||
| 419 | list_for_each_entry(dev, &clockevent_devices, list) | ||
| 420 | if (dev->resume) | ||
| 421 | dev->resume(dev); | ||
| 422 | } | ||
| 423 | |||
| 400 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | 424 | #ifdef CONFIG_GENERIC_CLOCKEVENTS |
| 401 | /** | 425 | /** |
| 402 | * clockevents_notify - notification about relevant events | 426 | * clockevents_notify - notification about relevant events |
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 46da0537c10b..6629bf7b5285 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c | |||
| @@ -37,7 +37,7 @@ | |||
| 37 | * requested HZ value. It is also not recommended | 37 | * requested HZ value. It is also not recommended |
| 38 | * for "tick-less" systems. | 38 | * for "tick-less" systems. |
| 39 | */ | 39 | */ |
| 40 | #define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/SHIFTED_HZ)) | 40 | #define NSEC_PER_JIFFY ((NSEC_PER_SEC+HZ/2)/HZ) |
| 41 | 41 | ||
| 42 | /* Since jiffies uses a simple NSEC_PER_JIFFY multiplier | 42 | /* Since jiffies uses a simple NSEC_PER_JIFFY multiplier |
| 43 | * conversion, the .shift value could be zero. However | 43 | * conversion, the .shift value could be zero. However |
| @@ -95,3 +95,33 @@ struct clocksource * __init __weak clocksource_default_clock(void) | |||
| 95 | { | 95 | { |
| 96 | return &clocksource_jiffies; | 96 | return &clocksource_jiffies; |
| 97 | } | 97 | } |
| 98 | |||
| 99 | struct clocksource refined_jiffies; | ||
| 100 | |||
| 101 | int register_refined_jiffies(long cycles_per_second) | ||
| 102 | { | ||
| 103 | u64 nsec_per_tick, shift_hz; | ||
| 104 | long cycles_per_tick; | ||
| 105 | |||
| 106 | |||
| 107 | |||
| 108 | refined_jiffies = clocksource_jiffies; | ||
| 109 | refined_jiffies.name = "refined-jiffies"; | ||
| 110 | refined_jiffies.rating++; | ||
| 111 | |||
| 112 | /* Calc cycles per tick */ | ||
| 113 | cycles_per_tick = (cycles_per_second + HZ/2)/HZ; | ||
| 114 | /* shift_hz stores hz<<8 for extra accuracy */ | ||
| 115 | shift_hz = (u64)cycles_per_second << 8; | ||
| 116 | shift_hz += cycles_per_tick/2; | ||
| 117 | do_div(shift_hz, cycles_per_tick); | ||
| 118 | /* Calculate nsec_per_tick using shift_hz */ | ||
| 119 | nsec_per_tick = (u64)NSEC_PER_SEC << 8; | ||
| 120 | nsec_per_tick += (u32)shift_hz/2; | ||
| 121 | do_div(nsec_per_tick, (u32)shift_hz); | ||
| 122 | |||
| 123 | refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT; | ||
| 124 | |||
| 125 | clocksource_register(&refined_jiffies); | ||
| 126 | return 0; | ||
| 127 | } | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f423bdd035c2..a40260885265 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -835,7 +835,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | |||
| 835 | */ | 835 | */ |
| 836 | if (ts->tick_stopped) { | 836 | if (ts->tick_stopped) { |
| 837 | touch_softlockup_watchdog(); | 837 | touch_softlockup_watchdog(); |
| 838 | if (idle_cpu(cpu)) | 838 | if (is_idle_task(current)) |
| 839 | ts->idle_jiffies++; | 839 | ts->idle_jiffies++; |
| 840 | } | 840 | } |
| 841 | update_process_times(user_mode(regs)); | 841 | update_process_times(user_mode(regs)); |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index d3b91e75cecd..e424970bb562 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | * | 8 | * |
| 9 | */ | 9 | */ |
| 10 | 10 | ||
| 11 | #include <linux/timekeeper_internal.h> | ||
| 11 | #include <linux/module.h> | 12 | #include <linux/module.h> |
| 12 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
| 13 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
| @@ -21,61 +22,6 @@ | |||
| 21 | #include <linux/tick.h> | 22 | #include <linux/tick.h> |
| 22 | #include <linux/stop_machine.h> | 23 | #include <linux/stop_machine.h> |
| 23 | 24 | ||
| 24 | /* Structure holding internal timekeeping values. */ | ||
| 25 | struct timekeeper { | ||
| 26 | /* Current clocksource used for timekeeping. */ | ||
| 27 | struct clocksource *clock; | ||
| 28 | /* NTP adjusted clock multiplier */ | ||
| 29 | u32 mult; | ||
| 30 | /* The shift value of the current clocksource. */ | ||
| 31 | u32 shift; | ||
| 32 | /* Number of clock cycles in one NTP interval. */ | ||
| 33 | cycle_t cycle_interval; | ||
| 34 | /* Number of clock shifted nano seconds in one NTP interval. */ | ||
| 35 | u64 xtime_interval; | ||
| 36 | /* shifted nano seconds left over when rounding cycle_interval */ | ||
| 37 | s64 xtime_remainder; | ||
| 38 | /* Raw nano seconds accumulated per NTP interval. */ | ||
| 39 | u32 raw_interval; | ||
| 40 | |||
| 41 | /* Current CLOCK_REALTIME time in seconds */ | ||
| 42 | u64 xtime_sec; | ||
| 43 | /* Clock shifted nano seconds */ | ||
| 44 | u64 xtime_nsec; | ||
| 45 | |||
| 46 | /* Difference between accumulated time and NTP time in ntp | ||
| 47 | * shifted nano seconds. */ | ||
| 48 | s64 ntp_error; | ||
| 49 | /* Shift conversion between clock shifted nano seconds and | ||
| 50 | * ntp shifted nano seconds. */ | ||
| 51 | u32 ntp_error_shift; | ||
| 52 | |||
| 53 | /* | ||
| 54 | * wall_to_monotonic is what we need to add to xtime (or xtime corrected | ||
| 55 | * for sub jiffie times) to get to monotonic time. Monotonic is pegged | ||
| 56 | * at zero at system boot time, so wall_to_monotonic will be negative, | ||
| 57 | * however, we will ALWAYS keep the tv_nsec part positive so we can use | ||
| 58 | * the usual normalization. | ||
| 59 | * | ||
| 60 | * wall_to_monotonic is moved after resume from suspend for the | ||
| 61 | * monotonic time not to jump. We need to add total_sleep_time to | ||
| 62 | * wall_to_monotonic to get the real boot based time offset. | ||
| 63 | * | ||
| 64 | * - wall_to_monotonic is no longer the boot time, getboottime must be | ||
| 65 | * used instead. | ||
| 66 | */ | ||
| 67 | struct timespec wall_to_monotonic; | ||
| 68 | /* Offset clock monotonic -> clock realtime */ | ||
| 69 | ktime_t offs_real; | ||
| 70 | /* time spent in suspend */ | ||
| 71 | struct timespec total_sleep_time; | ||
| 72 | /* Offset clock monotonic -> clock boottime */ | ||
| 73 | ktime_t offs_boot; | ||
| 74 | /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ | ||
| 75 | struct timespec raw_time; | ||
| 76 | /* Seqlock for all timekeeper values */ | ||
| 77 | seqlock_t lock; | ||
| 78 | }; | ||
| 79 | 25 | ||
| 80 | static struct timekeeper timekeeper; | 26 | static struct timekeeper timekeeper; |
| 81 | 27 | ||
| @@ -96,15 +42,6 @@ static inline void tk_normalize_xtime(struct timekeeper *tk) | |||
| 96 | } | 42 | } |
| 97 | } | 43 | } |
| 98 | 44 | ||
| 99 | static struct timespec tk_xtime(struct timekeeper *tk) | ||
| 100 | { | ||
| 101 | struct timespec ts; | ||
| 102 | |||
| 103 | ts.tv_sec = tk->xtime_sec; | ||
| 104 | ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); | ||
| 105 | return ts; | ||
| 106 | } | ||
| 107 | |||
| 108 | static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) | 45 | static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) |
| 109 | { | 46 | { |
| 110 | tk->xtime_sec = ts->tv_sec; | 47 | tk->xtime_sec = ts->tv_sec; |
| @@ -246,14 +183,11 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) | |||
| 246 | /* must hold write on timekeeper.lock */ | 183 | /* must hold write on timekeeper.lock */ |
| 247 | static void timekeeping_update(struct timekeeper *tk, bool clearntp) | 184 | static void timekeeping_update(struct timekeeper *tk, bool clearntp) |
| 248 | { | 185 | { |
| 249 | struct timespec xt; | ||
| 250 | |||
| 251 | if (clearntp) { | 186 | if (clearntp) { |
| 252 | tk->ntp_error = 0; | 187 | tk->ntp_error = 0; |
| 253 | ntp_clear(); | 188 | ntp_clear(); |
| 254 | } | 189 | } |
| 255 | xt = tk_xtime(tk); | 190 | update_vsyscall(tk); |
| 256 | update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult); | ||
| 257 | } | 191 | } |
| 258 | 192 | ||
| 259 | /** | 193 | /** |
| @@ -776,6 +710,7 @@ static void timekeeping_resume(void) | |||
| 776 | 710 | ||
| 777 | read_persistent_clock(&ts); | 711 | read_persistent_clock(&ts); |
| 778 | 712 | ||
| 713 | clockevents_resume(); | ||
| 779 | clocksource_resume(); | 714 | clocksource_resume(); |
| 780 | 715 | ||
| 781 | write_seqlock_irqsave(&tk->lock, flags); | 716 | write_seqlock_irqsave(&tk->lock, flags); |
| @@ -835,6 +770,7 @@ static int timekeeping_suspend(void) | |||
| 835 | 770 | ||
| 836 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); | 771 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); |
| 837 | clocksource_suspend(); | 772 | clocksource_suspend(); |
| 773 | clockevents_suspend(); | ||
| 838 | 774 | ||
| 839 | return 0; | 775 | return 0; |
| 840 | } | 776 | } |
| @@ -1111,7 +1047,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, | |||
| 1111 | accumulate_nsecs_to_secs(tk); | 1047 | accumulate_nsecs_to_secs(tk); |
| 1112 | 1048 | ||
| 1113 | /* Accumulate raw time */ | 1049 | /* Accumulate raw time */ |
| 1114 | raw_nsecs = tk->raw_interval << shift; | 1050 | raw_nsecs = (u64)tk->raw_interval << shift; |
| 1115 | raw_nsecs += tk->raw_time.tv_nsec; | 1051 | raw_nsecs += tk->raw_time.tv_nsec; |
| 1116 | if (raw_nsecs >= NSEC_PER_SEC) { | 1052 | if (raw_nsecs >= NSEC_PER_SEC) { |
| 1117 | u64 raw_secs = raw_nsecs; | 1053 | u64 raw_secs = raw_nsecs; |
| @@ -1128,6 +1064,33 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, | |||
| 1128 | return offset; | 1064 | return offset; |
| 1129 | } | 1065 | } |
| 1130 | 1066 | ||
| 1067 | #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD | ||
| 1068 | static inline void old_vsyscall_fixup(struct timekeeper *tk) | ||
| 1069 | { | ||
| 1070 | s64 remainder; | ||
| 1071 | |||
| 1072 | /* | ||
| 1073 | * Store only full nanoseconds into xtime_nsec after rounding | ||
| 1074 | * it up and add the remainder to the error difference. | ||
| 1075 | * XXX - This is necessary to avoid small 1ns inconsistnecies caused | ||
| 1076 | * by truncating the remainder in vsyscalls. However, it causes | ||
| 1077 | * additional work to be done in timekeeping_adjust(). Once | ||
| 1078 | * the vsyscall implementations are converted to use xtime_nsec | ||
| 1079 | * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD | ||
| 1080 | * users are removed, this can be killed. | ||
| 1081 | */ | ||
| 1082 | remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); | ||
| 1083 | tk->xtime_nsec -= remainder; | ||
| 1084 | tk->xtime_nsec += 1ULL << tk->shift; | ||
| 1085 | tk->ntp_error += remainder << tk->ntp_error_shift; | ||
| 1086 | |||
| 1087 | } | ||
| 1088 | #else | ||
| 1089 | #define old_vsyscall_fixup(tk) | ||
| 1090 | #endif | ||
| 1091 | |||
| 1092 | |||
| 1093 | |||
| 1131 | /** | 1094 | /** |
| 1132 | * update_wall_time - Uses the current clocksource to increment the wall time | 1095 | * update_wall_time - Uses the current clocksource to increment the wall time |
| 1133 | * | 1096 | * |
| @@ -1139,7 +1102,6 @@ static void update_wall_time(void) | |||
| 1139 | cycle_t offset; | 1102 | cycle_t offset; |
| 1140 | int shift = 0, maxshift; | 1103 | int shift = 0, maxshift; |
| 1141 | unsigned long flags; | 1104 | unsigned long flags; |
| 1142 | s64 remainder; | ||
| 1143 | 1105 | ||
| 1144 | write_seqlock_irqsave(&tk->lock, flags); | 1106 | write_seqlock_irqsave(&tk->lock, flags); |
| 1145 | 1107 | ||
| @@ -1181,20 +1143,11 @@ static void update_wall_time(void) | |||
| 1181 | /* correct the clock when NTP error is too big */ | 1143 | /* correct the clock when NTP error is too big */ |
| 1182 | timekeeping_adjust(tk, offset); | 1144 | timekeeping_adjust(tk, offset); |
| 1183 | 1145 | ||
| 1184 | |||
| 1185 | /* | 1146 | /* |
| 1186 | * Store only full nanoseconds into xtime_nsec after rounding | 1147 | * XXX This can be killed once everyone converts |
| 1187 | * it up and add the remainder to the error difference. | 1148 | * to the new update_vsyscall. |
| 1188 | * XXX - This is necessary to avoid small 1ns inconsistnecies caused | 1149 | */ |
| 1189 | * by truncating the remainder in vsyscalls. However, it causes | 1150 | old_vsyscall_fixup(tk); |
| 1190 | * additional work to be done in timekeeping_adjust(). Once | ||
| 1191 | * the vsyscall implementations are converted to use xtime_nsec | ||
| 1192 | * (shifted nanoseconds), this can be killed. | ||
| 1193 | */ | ||
| 1194 | remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); | ||
| 1195 | tk->xtime_nsec -= remainder; | ||
| 1196 | tk->xtime_nsec += 1ULL << tk->shift; | ||
| 1197 | tk->ntp_error += remainder << tk->ntp_error_shift; | ||
| 1198 | 1151 | ||
| 1199 | /* | 1152 | /* |
| 1200 | * Finally, make sure that after the rounding | 1153 | * Finally, make sure that after the rounding |
diff --git a/kernel/timer.c b/kernel/timer.c index d5de1b2292aa..367d00858482 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -63,6 +63,7 @@ EXPORT_SYMBOL(jiffies_64); | |||
| 63 | #define TVR_SIZE (1 << TVR_BITS) | 63 | #define TVR_SIZE (1 << TVR_BITS) |
| 64 | #define TVN_MASK (TVN_SIZE - 1) | 64 | #define TVN_MASK (TVN_SIZE - 1) |
| 65 | #define TVR_MASK (TVR_SIZE - 1) | 65 | #define TVR_MASK (TVR_SIZE - 1) |
| 66 | #define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1)) | ||
| 66 | 67 | ||
| 67 | struct tvec { | 68 | struct tvec { |
| 68 | struct list_head vec[TVN_SIZE]; | 69 | struct list_head vec[TVN_SIZE]; |
| @@ -359,11 +360,12 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer) | |||
| 359 | vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); | 360 | vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); |
| 360 | } else { | 361 | } else { |
| 361 | int i; | 362 | int i; |
| 362 | /* If the timeout is larger than 0xffffffff on 64-bit | 363 | /* If the timeout is larger than MAX_TVAL (on 64-bit |
| 363 | * architectures then we use the maximum timeout: | 364 | * architectures or with CONFIG_BASE_SMALL=1) then we |
| 365 | * use the maximum timeout. | ||
| 364 | */ | 366 | */ |
| 365 | if (idx > 0xffffffffUL) { | 367 | if (idx > MAX_TVAL) { |
| 366 | idx = 0xffffffffUL; | 368 | idx = MAX_TVAL; |
| 367 | expires = idx + base->timer_jiffies; | 369 | expires = idx + base->timer_jiffies; |
| 368 | } | 370 | } |
| 369 | i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; | 371 | i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1ec5c1dab629..31e4f55773f1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -2061,7 +2061,8 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) | |||
| 2061 | seq_puts(m, "# -----------------\n"); | 2061 | seq_puts(m, "# -----------------\n"); |
| 2062 | seq_printf(m, "# | task: %.16s-%d " | 2062 | seq_printf(m, "# | task: %.16s-%d " |
| 2063 | "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", | 2063 | "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", |
| 2064 | data->comm, data->pid, data->uid, data->nice, | 2064 | data->comm, data->pid, |
| 2065 | from_kuid_munged(seq_user_ns(m), data->uid), data->nice, | ||
| 2065 | data->policy, data->rt_priority); | 2066 | data->policy, data->rt_priority); |
| 2066 | seq_puts(m, "# -----------------\n"); | 2067 | seq_puts(m, "# -----------------\n"); |
| 2067 | 2068 | ||
| @@ -4199,12 +4200,6 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, | |||
| 4199 | buf->private = 0; | 4200 | buf->private = 0; |
| 4200 | } | 4201 | } |
| 4201 | 4202 | ||
| 4202 | static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe, | ||
| 4203 | struct pipe_buffer *buf) | ||
| 4204 | { | ||
| 4205 | return 1; | ||
| 4206 | } | ||
| 4207 | |||
| 4208 | static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, | 4203 | static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, |
| 4209 | struct pipe_buffer *buf) | 4204 | struct pipe_buffer *buf) |
| 4210 | { | 4205 | { |
| @@ -4220,7 +4215,7 @@ static const struct pipe_buf_operations buffer_pipe_buf_ops = { | |||
| 4220 | .unmap = generic_pipe_buf_unmap, | 4215 | .unmap = generic_pipe_buf_unmap, |
| 4221 | .confirm = generic_pipe_buf_confirm, | 4216 | .confirm = generic_pipe_buf_confirm, |
| 4222 | .release = buffer_pipe_buf_release, | 4217 | .release = buffer_pipe_buf_release, |
| 4223 | .steal = buffer_pipe_buf_steal, | 4218 | .steal = generic_pipe_buf_steal, |
| 4224 | .get = buffer_pipe_buf_get, | 4219 | .get = buffer_pipe_buf_get, |
| 4225 | }; | 4220 | }; |
| 4226 | 4221 | ||
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 63a2da0b9a6e..c15f528c1af4 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -147,7 +147,7 @@ struct trace_array_cpu { | |||
| 147 | unsigned long skipped_entries; | 147 | unsigned long skipped_entries; |
| 148 | cycle_t preempt_timestamp; | 148 | cycle_t preempt_timestamp; |
| 149 | pid_t pid; | 149 | pid_t pid; |
| 150 | uid_t uid; | 150 | kuid_t uid; |
| 151 | char comm[TASK_COMM_LEN]; | 151 | char comm[TASK_COMM_LEN]; |
| 152 | }; | 152 | }; |
| 153 | 153 | ||
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 483162a9f908..507a7a9630bf 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
| @@ -13,7 +13,6 @@ | |||
| 13 | #include <linux/debugfs.h> | 13 | #include <linux/debugfs.h> |
| 14 | #include <linux/uaccess.h> | 14 | #include <linux/uaccess.h> |
| 15 | #include <linux/ftrace.h> | 15 | #include <linux/ftrace.h> |
| 16 | #include <linux/pstore.h> | ||
| 17 | #include <linux/fs.h> | 16 | #include <linux/fs.h> |
| 18 | 17 | ||
| 19 | #include "trace.h" | 18 | #include "trace.h" |
| @@ -76,10 +75,9 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip, | |||
| 76 | preempt_enable_notrace(); | 75 | preempt_enable_notrace(); |
| 77 | } | 76 | } |
| 78 | 77 | ||
| 79 | /* Our two options */ | 78 | /* Our option */ |
| 80 | enum { | 79 | enum { |
| 81 | TRACE_FUNC_OPT_STACK = 0x1, | 80 | TRACE_FUNC_OPT_STACK = 0x1, |
| 82 | TRACE_FUNC_OPT_PSTORE = 0x2, | ||
| 83 | }; | 81 | }; |
| 84 | 82 | ||
| 85 | static struct tracer_flags func_flags; | 83 | static struct tracer_flags func_flags; |
| @@ -109,12 +107,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, | |||
| 109 | disabled = atomic_inc_return(&data->disabled); | 107 | disabled = atomic_inc_return(&data->disabled); |
| 110 | 108 | ||
| 111 | if (likely(disabled == 1)) { | 109 | if (likely(disabled == 1)) { |
| 112 | /* | ||
| 113 | * So far tracing doesn't support multiple buffers, so | ||
| 114 | * we make an explicit call for now. | ||
| 115 | */ | ||
| 116 | if (unlikely(func_flags.val & TRACE_FUNC_OPT_PSTORE)) | ||
| 117 | pstore_ftrace_call(ip, parent_ip); | ||
| 118 | pc = preempt_count(); | 110 | pc = preempt_count(); |
| 119 | trace_function(tr, ip, parent_ip, flags, pc); | 111 | trace_function(tr, ip, parent_ip, flags, pc); |
| 120 | } | 112 | } |
| @@ -181,9 +173,6 @@ static struct tracer_opt func_opts[] = { | |||
| 181 | #ifdef CONFIG_STACKTRACE | 173 | #ifdef CONFIG_STACKTRACE |
| 182 | { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) }, | 174 | { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) }, |
| 183 | #endif | 175 | #endif |
| 184 | #ifdef CONFIG_PSTORE_FTRACE | ||
| 185 | { TRACER_OPT(func_pstore, TRACE_FUNC_OPT_PSTORE) }, | ||
| 186 | #endif | ||
| 187 | { } /* Always set a last empty entry */ | 176 | { } /* Always set a last empty entry */ |
| 188 | }; | 177 | }; |
| 189 | 178 | ||
| @@ -236,8 +225,6 @@ static int func_set_flag(u32 old_flags, u32 bit, int set) | |||
| 236 | } | 225 | } |
| 237 | 226 | ||
| 238 | break; | 227 | break; |
| 239 | case TRACE_FUNC_OPT_PSTORE: | ||
| 240 | break; | ||
| 241 | default: | 228 | default: |
| 242 | return -EINVAL; | 229 | return -EINVAL; |
| 243 | } | 230 | } |
diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 23b4d784ebdd..625df0b44690 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c | |||
| @@ -26,7 +26,9 @@ | |||
| 26 | /* | 26 | /* |
| 27 | * fill in basic accounting fields | 27 | * fill in basic accounting fields |
| 28 | */ | 28 | */ |
| 29 | void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) | 29 | void bacct_add_tsk(struct user_namespace *user_ns, |
| 30 | struct pid_namespace *pid_ns, | ||
| 31 | struct taskstats *stats, struct task_struct *tsk) | ||
| 30 | { | 32 | { |
| 31 | const struct cred *tcred; | 33 | const struct cred *tcred; |
| 32 | struct timespec uptime, ts; | 34 | struct timespec uptime, ts; |
| @@ -55,13 +57,13 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) | |||
| 55 | stats->ac_flag |= AXSIG; | 57 | stats->ac_flag |= AXSIG; |
| 56 | stats->ac_nice = task_nice(tsk); | 58 | stats->ac_nice = task_nice(tsk); |
| 57 | stats->ac_sched = tsk->policy; | 59 | stats->ac_sched = tsk->policy; |
| 58 | stats->ac_pid = tsk->pid; | 60 | stats->ac_pid = task_pid_nr_ns(tsk, pid_ns); |
| 59 | rcu_read_lock(); | 61 | rcu_read_lock(); |
| 60 | tcred = __task_cred(tsk); | 62 | tcred = __task_cred(tsk); |
| 61 | stats->ac_uid = tcred->uid; | 63 | stats->ac_uid = from_kuid_munged(user_ns, tcred->uid); |
| 62 | stats->ac_gid = tcred->gid; | 64 | stats->ac_gid = from_kgid_munged(user_ns, tcred->gid); |
| 63 | stats->ac_ppid = pid_alive(tsk) ? | 65 | stats->ac_ppid = pid_alive(tsk) ? |
| 64 | rcu_dereference(tsk->real_parent)->tgid : 0; | 66 | task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0; |
| 65 | rcu_read_unlock(); | 67 | rcu_read_unlock(); |
| 66 | stats->ac_utime = cputime_to_usecs(tsk->utime); | 68 | stats->ac_utime = cputime_to_usecs(tsk->utime); |
| 67 | stats->ac_stime = cputime_to_usecs(tsk->stime); | 69 | stats->ac_stime = cputime_to_usecs(tsk->stime); |
diff --git a/kernel/user.c b/kernel/user.c index b815fefbe76f..750acffbe9ec 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
| @@ -38,6 +38,14 @@ struct user_namespace init_user_ns = { | |||
| 38 | .count = 4294967295U, | 38 | .count = 4294967295U, |
| 39 | }, | 39 | }, |
| 40 | }, | 40 | }, |
| 41 | .projid_map = { | ||
| 42 | .nr_extents = 1, | ||
| 43 | .extent[0] = { | ||
| 44 | .first = 0, | ||
| 45 | .lower_first = 0, | ||
| 46 | .count = 4294967295U, | ||
| 47 | }, | ||
| 48 | }, | ||
| 41 | .kref = { | 49 | .kref = { |
| 42 | .refcount = ATOMIC_INIT(3), | 50 | .refcount = ATOMIC_INIT(3), |
| 43 | }, | 51 | }, |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 86602316422d..456a6b9fba34 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
| 20 | #include <linux/uaccess.h> | 20 | #include <linux/uaccess.h> |
| 21 | #include <linux/ctype.h> | 21 | #include <linux/ctype.h> |
| 22 | #include <linux/projid.h> | ||
| 22 | 23 | ||
| 23 | static struct kmem_cache *user_ns_cachep __read_mostly; | 24 | static struct kmem_cache *user_ns_cachep __read_mostly; |
| 24 | 25 | ||
| @@ -295,6 +296,75 @@ gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid) | |||
| 295 | } | 296 | } |
| 296 | EXPORT_SYMBOL(from_kgid_munged); | 297 | EXPORT_SYMBOL(from_kgid_munged); |
| 297 | 298 | ||
| 299 | /** | ||
| 300 | * make_kprojid - Map a user-namespace projid pair into a kprojid. | ||
| 301 | * @ns: User namespace that the projid is in | ||
| 302 | * @projid: Project identifier | ||
| 303 | * | ||
| 304 | * Maps a user-namespace uid pair into a kernel internal kuid, | ||
| 305 | * and returns that kuid. | ||
| 306 | * | ||
| 307 | * When there is no mapping defined for the user-namespace projid | ||
| 308 | * pair INVALID_PROJID is returned. Callers are expected to test | ||
| 309 | * for and handle handle INVALID_PROJID being returned. INVALID_PROJID | ||
| 310 | * may be tested for using projid_valid(). | ||
| 311 | */ | ||
| 312 | kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid) | ||
| 313 | { | ||
| 314 | /* Map the uid to a global kernel uid */ | ||
| 315 | return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid)); | ||
| 316 | } | ||
| 317 | EXPORT_SYMBOL(make_kprojid); | ||
| 318 | |||
| 319 | /** | ||
| 320 | * from_kprojid - Create a projid from a kprojid user-namespace pair. | ||
| 321 | * @targ: The user namespace we want a projid in. | ||
| 322 | * @kprojid: The kernel internal project identifier to start with. | ||
| 323 | * | ||
| 324 | * Map @kprojid into the user-namespace specified by @targ and | ||
| 325 | * return the resulting projid. | ||
| 326 | * | ||
| 327 | * There is always a mapping into the initial user_namespace. | ||
| 328 | * | ||
| 329 | * If @kprojid has no mapping in @targ (projid_t)-1 is returned. | ||
| 330 | */ | ||
| 331 | projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid) | ||
| 332 | { | ||
| 333 | /* Map the uid from a global kernel uid */ | ||
| 334 | return map_id_up(&targ->projid_map, __kprojid_val(kprojid)); | ||
| 335 | } | ||
| 336 | EXPORT_SYMBOL(from_kprojid); | ||
| 337 | |||
| 338 | /** | ||
| 339 | * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair. | ||
| 340 | * @targ: The user namespace we want a projid in. | ||
| 341 | * @kprojid: The kernel internal projid to start with. | ||
| 342 | * | ||
| 343 | * Map @kprojid into the user-namespace specified by @targ and | ||
| 344 | * return the resulting projid. | ||
| 345 | * | ||
| 346 | * There is always a mapping into the initial user_namespace. | ||
| 347 | * | ||
| 348 | * Unlike from_kprojid from_kprojid_munged never fails and always | ||
| 349 | * returns a valid projid. This makes from_kprojid_munged | ||
| 350 | * appropriate for use in syscalls like stat and where | ||
| 351 | * failing the system call and failing to provide a valid projid are | ||
| 352 | * not an options. | ||
| 353 | * | ||
| 354 | * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned. | ||
| 355 | */ | ||
| 356 | projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid) | ||
| 357 | { | ||
| 358 | projid_t projid; | ||
| 359 | projid = from_kprojid(targ, kprojid); | ||
| 360 | |||
| 361 | if (projid == (projid_t) -1) | ||
| 362 | projid = OVERFLOW_PROJID; | ||
| 363 | return projid; | ||
| 364 | } | ||
| 365 | EXPORT_SYMBOL(from_kprojid_munged); | ||
| 366 | |||
| 367 | |||
| 298 | static int uid_m_show(struct seq_file *seq, void *v) | 368 | static int uid_m_show(struct seq_file *seq, void *v) |
| 299 | { | 369 | { |
| 300 | struct user_namespace *ns = seq->private; | 370 | struct user_namespace *ns = seq->private; |
| @@ -337,6 +407,27 @@ static int gid_m_show(struct seq_file *seq, void *v) | |||
| 337 | return 0; | 407 | return 0; |
| 338 | } | 408 | } |
| 339 | 409 | ||
| 410 | static int projid_m_show(struct seq_file *seq, void *v) | ||
| 411 | { | ||
| 412 | struct user_namespace *ns = seq->private; | ||
| 413 | struct uid_gid_extent *extent = v; | ||
| 414 | struct user_namespace *lower_ns; | ||
| 415 | projid_t lower; | ||
| 416 | |||
| 417 | lower_ns = seq_user_ns(seq); | ||
| 418 | if ((lower_ns == ns) && lower_ns->parent) | ||
| 419 | lower_ns = lower_ns->parent; | ||
| 420 | |||
| 421 | lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first)); | ||
| 422 | |||
| 423 | seq_printf(seq, "%10u %10u %10u\n", | ||
| 424 | extent->first, | ||
| 425 | lower, | ||
| 426 | extent->count); | ||
| 427 | |||
| 428 | return 0; | ||
| 429 | } | ||
| 430 | |||
| 340 | static void *m_start(struct seq_file *seq, loff_t *ppos, struct uid_gid_map *map) | 431 | static void *m_start(struct seq_file *seq, loff_t *ppos, struct uid_gid_map *map) |
| 341 | { | 432 | { |
| 342 | struct uid_gid_extent *extent = NULL; | 433 | struct uid_gid_extent *extent = NULL; |
| @@ -362,6 +453,13 @@ static void *gid_m_start(struct seq_file *seq, loff_t *ppos) | |||
| 362 | return m_start(seq, ppos, &ns->gid_map); | 453 | return m_start(seq, ppos, &ns->gid_map); |
| 363 | } | 454 | } |
| 364 | 455 | ||
| 456 | static void *projid_m_start(struct seq_file *seq, loff_t *ppos) | ||
| 457 | { | ||
| 458 | struct user_namespace *ns = seq->private; | ||
| 459 | |||
| 460 | return m_start(seq, ppos, &ns->projid_map); | ||
| 461 | } | ||
| 462 | |||
| 365 | static void *m_next(struct seq_file *seq, void *v, loff_t *pos) | 463 | static void *m_next(struct seq_file *seq, void *v, loff_t *pos) |
| 366 | { | 464 | { |
| 367 | (*pos)++; | 465 | (*pos)++; |
| @@ -387,6 +485,13 @@ struct seq_operations proc_gid_seq_operations = { | |||
| 387 | .show = gid_m_show, | 485 | .show = gid_m_show, |
| 388 | }; | 486 | }; |
| 389 | 487 | ||
| 488 | struct seq_operations proc_projid_seq_operations = { | ||
| 489 | .start = projid_m_start, | ||
| 490 | .stop = m_stop, | ||
| 491 | .next = m_next, | ||
| 492 | .show = projid_m_show, | ||
| 493 | }; | ||
| 494 | |||
| 390 | static DEFINE_MUTEX(id_map_mutex); | 495 | static DEFINE_MUTEX(id_map_mutex); |
| 391 | 496 | ||
| 392 | static ssize_t map_write(struct file *file, const char __user *buf, | 497 | static ssize_t map_write(struct file *file, const char __user *buf, |
| @@ -434,7 +539,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, | |||
| 434 | /* Require the appropriate privilege CAP_SETUID or CAP_SETGID | 539 | /* Require the appropriate privilege CAP_SETUID or CAP_SETGID |
| 435 | * over the user namespace in order to set the id mapping. | 540 | * over the user namespace in order to set the id mapping. |
| 436 | */ | 541 | */ |
| 437 | if (!ns_capable(ns, cap_setid)) | 542 | if (cap_valid(cap_setid) && !ns_capable(ns, cap_setid)) |
| 438 | goto out; | 543 | goto out; |
| 439 | 544 | ||
| 440 | /* Get a buffer */ | 545 | /* Get a buffer */ |
| @@ -584,9 +689,30 @@ ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t siz | |||
| 584 | &ns->gid_map, &ns->parent->gid_map); | 689 | &ns->gid_map, &ns->parent->gid_map); |
| 585 | } | 690 | } |
| 586 | 691 | ||
| 692 | ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos) | ||
| 693 | { | ||
| 694 | struct seq_file *seq = file->private_data; | ||
| 695 | struct user_namespace *ns = seq->private; | ||
| 696 | struct user_namespace *seq_ns = seq_user_ns(seq); | ||
| 697 | |||
| 698 | if (!ns->parent) | ||
| 699 | return -EPERM; | ||
| 700 | |||
| 701 | if ((seq_ns != ns) && (seq_ns != ns->parent)) | ||
| 702 | return -EPERM; | ||
| 703 | |||
| 704 | /* Anyone can set any valid project id no capability needed */ | ||
| 705 | return map_write(file, buf, size, ppos, -1, | ||
| 706 | &ns->projid_map, &ns->parent->projid_map); | ||
| 707 | } | ||
| 708 | |||
| 587 | static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, | 709 | static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, |
| 588 | struct uid_gid_map *new_map) | 710 | struct uid_gid_map *new_map) |
| 589 | { | 711 | { |
| 712 | /* Allow anyone to set a mapping that doesn't require privilege */ | ||
| 713 | if (!cap_valid(cap_setid)) | ||
| 714 | return true; | ||
| 715 | |||
| 590 | /* Allow the specified ids if we have the appropriate capability | 716 | /* Allow the specified ids if we have the appropriate capability |
| 591 | * (CAP_SETUID or CAP_SETGID) over the parent user namespace. | 717 | * (CAP_SETUID or CAP_SETGID) over the parent user namespace. |
| 592 | */ | 718 | */ |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3c5a79e2134c..d951daa0ca9a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -58,7 +58,7 @@ enum { | |||
| 58 | * be executing on any CPU. The gcwq behaves as an unbound one. | 58 | * be executing on any CPU. The gcwq behaves as an unbound one. |
| 59 | * | 59 | * |
| 60 | * Note that DISASSOCIATED can be flipped only while holding | 60 | * Note that DISASSOCIATED can be flipped only while holding |
| 61 | * managership of all pools on the gcwq to avoid changing binding | 61 | * assoc_mutex of all pools on the gcwq to avoid changing binding |
| 62 | * state while create_worker() is in progress. | 62 | * state while create_worker() is in progress. |
| 63 | */ | 63 | */ |
| 64 | GCWQ_DISASSOCIATED = 1 << 0, /* cpu can't serve workers */ | 64 | GCWQ_DISASSOCIATED = 1 << 0, /* cpu can't serve workers */ |
| @@ -73,11 +73,10 @@ enum { | |||
| 73 | WORKER_DIE = 1 << 1, /* die die die */ | 73 | WORKER_DIE = 1 << 1, /* die die die */ |
| 74 | WORKER_IDLE = 1 << 2, /* is idle */ | 74 | WORKER_IDLE = 1 << 2, /* is idle */ |
| 75 | WORKER_PREP = 1 << 3, /* preparing to run works */ | 75 | WORKER_PREP = 1 << 3, /* preparing to run works */ |
| 76 | WORKER_REBIND = 1 << 5, /* mom is home, come back */ | ||
| 77 | WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */ | 76 | WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */ |
| 78 | WORKER_UNBOUND = 1 << 7, /* worker is unbound */ | 77 | WORKER_UNBOUND = 1 << 7, /* worker is unbound */ |
| 79 | 78 | ||
| 80 | WORKER_NOT_RUNNING = WORKER_PREP | WORKER_REBIND | WORKER_UNBOUND | | 79 | WORKER_NOT_RUNNING = WORKER_PREP | WORKER_UNBOUND | |
| 81 | WORKER_CPU_INTENSIVE, | 80 | WORKER_CPU_INTENSIVE, |
| 82 | 81 | ||
| 83 | NR_WORKER_POOLS = 2, /* # worker pools per gcwq */ | 82 | NR_WORKER_POOLS = 2, /* # worker pools per gcwq */ |
| @@ -126,7 +125,6 @@ enum { | |||
| 126 | 125 | ||
| 127 | struct global_cwq; | 126 | struct global_cwq; |
| 128 | struct worker_pool; | 127 | struct worker_pool; |
| 129 | struct idle_rebind; | ||
| 130 | 128 | ||
| 131 | /* | 129 | /* |
| 132 | * The poor guys doing the actual heavy lifting. All on-duty workers | 130 | * The poor guys doing the actual heavy lifting. All on-duty workers |
| @@ -150,7 +148,6 @@ struct worker { | |||
| 150 | int id; /* I: worker id */ | 148 | int id; /* I: worker id */ |
| 151 | 149 | ||
| 152 | /* for rebinding worker to CPU */ | 150 | /* for rebinding worker to CPU */ |
| 153 | struct idle_rebind *idle_rebind; /* L: for idle worker */ | ||
| 154 | struct work_struct rebind_work; /* L: for busy worker */ | 151 | struct work_struct rebind_work; /* L: for busy worker */ |
| 155 | }; | 152 | }; |
| 156 | 153 | ||
| @@ -160,13 +157,15 @@ struct worker_pool { | |||
| 160 | 157 | ||
| 161 | struct list_head worklist; /* L: list of pending works */ | 158 | struct list_head worklist; /* L: list of pending works */ |
| 162 | int nr_workers; /* L: total number of workers */ | 159 | int nr_workers; /* L: total number of workers */ |
| 160 | |||
| 161 | /* nr_idle includes the ones off idle_list for rebinding */ | ||
| 163 | int nr_idle; /* L: currently idle ones */ | 162 | int nr_idle; /* L: currently idle ones */ |
| 164 | 163 | ||
| 165 | struct list_head idle_list; /* X: list of idle workers */ | 164 | struct list_head idle_list; /* X: list of idle workers */ |
| 166 | struct timer_list idle_timer; /* L: worker idle timeout */ | 165 | struct timer_list idle_timer; /* L: worker idle timeout */ |
| 167 | struct timer_list mayday_timer; /* L: SOS timer for workers */ | 166 | struct timer_list mayday_timer; /* L: SOS timer for workers */ |
| 168 | 167 | ||
| 169 | struct mutex manager_mutex; /* mutex manager should hold */ | 168 | struct mutex assoc_mutex; /* protect GCWQ_DISASSOCIATED */ |
| 170 | struct ida worker_ida; /* L: for worker IDs */ | 169 | struct ida worker_ida; /* L: for worker IDs */ |
| 171 | }; | 170 | }; |
| 172 | 171 | ||
| @@ -184,9 +183,8 @@ struct global_cwq { | |||
| 184 | struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE]; | 183 | struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE]; |
| 185 | /* L: hash of busy workers */ | 184 | /* L: hash of busy workers */ |
| 186 | 185 | ||
| 187 | struct worker_pool pools[2]; /* normal and highpri pools */ | 186 | struct worker_pool pools[NR_WORKER_POOLS]; |
| 188 | 187 | /* normal and highpri pools */ | |
| 189 | wait_queue_head_t rebind_hold; /* rebind hold wait */ | ||
| 190 | } ____cacheline_aligned_in_smp; | 188 | } ____cacheline_aligned_in_smp; |
| 191 | 189 | ||
| 192 | /* | 190 | /* |
| @@ -269,17 +267,15 @@ struct workqueue_struct { | |||
| 269 | }; | 267 | }; |
| 270 | 268 | ||
| 271 | struct workqueue_struct *system_wq __read_mostly; | 269 | struct workqueue_struct *system_wq __read_mostly; |
| 272 | struct workqueue_struct *system_long_wq __read_mostly; | ||
| 273 | struct workqueue_struct *system_nrt_wq __read_mostly; | ||
| 274 | struct workqueue_struct *system_unbound_wq __read_mostly; | ||
| 275 | struct workqueue_struct *system_freezable_wq __read_mostly; | ||
| 276 | struct workqueue_struct *system_nrt_freezable_wq __read_mostly; | ||
| 277 | EXPORT_SYMBOL_GPL(system_wq); | 270 | EXPORT_SYMBOL_GPL(system_wq); |
| 271 | struct workqueue_struct *system_highpri_wq __read_mostly; | ||
| 272 | EXPORT_SYMBOL_GPL(system_highpri_wq); | ||
| 273 | struct workqueue_struct *system_long_wq __read_mostly; | ||
| 278 | EXPORT_SYMBOL_GPL(system_long_wq); | 274 | EXPORT_SYMBOL_GPL(system_long_wq); |
| 279 | EXPORT_SYMBOL_GPL(system_nrt_wq); | 275 | struct workqueue_struct *system_unbound_wq __read_mostly; |
| 280 | EXPORT_SYMBOL_GPL(system_unbound_wq); | 276 | EXPORT_SYMBOL_GPL(system_unbound_wq); |
| 277 | struct workqueue_struct *system_freezable_wq __read_mostly; | ||
| 281 | EXPORT_SYMBOL_GPL(system_freezable_wq); | 278 | EXPORT_SYMBOL_GPL(system_freezable_wq); |
| 282 | EXPORT_SYMBOL_GPL(system_nrt_freezable_wq); | ||
| 283 | 279 | ||
| 284 | #define CREATE_TRACE_POINTS | 280 | #define CREATE_TRACE_POINTS |
| 285 | #include <trace/events/workqueue.h> | 281 | #include <trace/events/workqueue.h> |
| @@ -534,18 +530,24 @@ static int work_next_color(int color) | |||
| 534 | } | 530 | } |
| 535 | 531 | ||
| 536 | /* | 532 | /* |
| 537 | * A work's data points to the cwq with WORK_STRUCT_CWQ set while the | 533 | * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data |
| 538 | * work is on queue. Once execution starts, WORK_STRUCT_CWQ is | 534 | * contain the pointer to the queued cwq. Once execution starts, the flag |
| 539 | * cleared and the work data contains the cpu number it was last on. | 535 | * is cleared and the high bits contain OFFQ flags and CPU number. |
| 540 | * | 536 | * |
| 541 | * set_work_{cwq|cpu}() and clear_work_data() can be used to set the | 537 | * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling() |
| 542 | * cwq, cpu or clear work->data. These functions should only be | 538 | * and clear_work_data() can be used to set the cwq, cpu or clear |
| 543 | * called while the work is owned - ie. while the PENDING bit is set. | 539 | * work->data. These functions should only be called while the work is |
| 540 | * owned - ie. while the PENDING bit is set. | ||
| 544 | * | 541 | * |
| 545 | * get_work_[g]cwq() can be used to obtain the gcwq or cwq | 542 | * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to |
| 546 | * corresponding to a work. gcwq is available once the work has been | 543 | * a work. gcwq is available once the work has been queued anywhere after |
| 547 | * queued anywhere after initialization. cwq is available only from | 544 | * initialization until it is sync canceled. cwq is available only while |
| 548 | * queueing until execution starts. | 545 | * the work item is queued. |
| 546 | * | ||
| 547 | * %WORK_OFFQ_CANCELING is used to mark a work item which is being | ||
| 548 | * canceled. While being canceled, a work item may have its PENDING set | ||
| 549 | * but stay off timer and worklist for arbitrarily long and nobody should | ||
| 550 | * try to steal the PENDING bit. | ||
| 549 | */ | 551 | */ |
| 550 | static inline void set_work_data(struct work_struct *work, unsigned long data, | 552 | static inline void set_work_data(struct work_struct *work, unsigned long data, |
| 551 | unsigned long flags) | 553 | unsigned long flags) |
| @@ -562,13 +564,22 @@ static void set_work_cwq(struct work_struct *work, | |||
| 562 | WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags); | 564 | WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags); |
| 563 | } | 565 | } |
| 564 | 566 | ||
| 565 | static void set_work_cpu(struct work_struct *work, unsigned int cpu) | 567 | static void set_work_cpu_and_clear_pending(struct work_struct *work, |
| 568 | unsigned int cpu) | ||
| 566 | { | 569 | { |
| 567 | set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, WORK_STRUCT_PENDING); | 570 | /* |
| 571 | * The following wmb is paired with the implied mb in | ||
| 572 | * test_and_set_bit(PENDING) and ensures all updates to @work made | ||
| 573 | * here are visible to and precede any updates by the next PENDING | ||
| 574 | * owner. | ||
| 575 | */ | ||
| 576 | smp_wmb(); | ||
| 577 | set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0); | ||
| 568 | } | 578 | } |
| 569 | 579 | ||
| 570 | static void clear_work_data(struct work_struct *work) | 580 | static void clear_work_data(struct work_struct *work) |
| 571 | { | 581 | { |
| 582 | smp_wmb(); /* see set_work_cpu_and_clear_pending() */ | ||
| 572 | set_work_data(work, WORK_STRUCT_NO_CPU, 0); | 583 | set_work_data(work, WORK_STRUCT_NO_CPU, 0); |
| 573 | } | 584 | } |
| 574 | 585 | ||
| @@ -591,7 +602,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work) | |||
| 591 | return ((struct cpu_workqueue_struct *) | 602 | return ((struct cpu_workqueue_struct *) |
| 592 | (data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq; | 603 | (data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq; |
| 593 | 604 | ||
| 594 | cpu = data >> WORK_STRUCT_FLAG_BITS; | 605 | cpu = data >> WORK_OFFQ_CPU_SHIFT; |
| 595 | if (cpu == WORK_CPU_NONE) | 606 | if (cpu == WORK_CPU_NONE) |
| 596 | return NULL; | 607 | return NULL; |
| 597 | 608 | ||
| @@ -599,6 +610,22 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work) | |||
| 599 | return get_gcwq(cpu); | 610 | return get_gcwq(cpu); |
| 600 | } | 611 | } |
| 601 | 612 | ||
| 613 | static void mark_work_canceling(struct work_struct *work) | ||
| 614 | { | ||
| 615 | struct global_cwq *gcwq = get_work_gcwq(work); | ||
| 616 | unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE; | ||
| 617 | |||
| 618 | set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING, | ||
| 619 | WORK_STRUCT_PENDING); | ||
| 620 | } | ||
| 621 | |||
| 622 | static bool work_is_canceling(struct work_struct *work) | ||
| 623 | { | ||
| 624 | unsigned long data = atomic_long_read(&work->data); | ||
| 625 | |||
| 626 | return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING); | ||
| 627 | } | ||
| 628 | |||
| 602 | /* | 629 | /* |
| 603 | * Policy functions. These define the policies on how the global worker | 630 | * Policy functions. These define the policies on how the global worker |
| 604 | * pools are managed. Unless noted otherwise, these functions assume that | 631 | * pools are managed. Unless noted otherwise, these functions assume that |
| @@ -657,6 +684,13 @@ static bool too_many_workers(struct worker_pool *pool) | |||
| 657 | int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ | 684 | int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ |
| 658 | int nr_busy = pool->nr_workers - nr_idle; | 685 | int nr_busy = pool->nr_workers - nr_idle; |
| 659 | 686 | ||
| 687 | /* | ||
| 688 | * nr_idle and idle_list may disagree if idle rebinding is in | ||
| 689 | * progress. Never return %true if idle_list is empty. | ||
| 690 | */ | ||
| 691 | if (list_empty(&pool->idle_list)) | ||
| 692 | return false; | ||
| 693 | |||
| 660 | return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; | 694 | return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; |
| 661 | } | 695 | } |
| 662 | 696 | ||
| @@ -903,6 +937,206 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq, | |||
| 903 | } | 937 | } |
| 904 | 938 | ||
| 905 | /** | 939 | /** |
| 940 | * move_linked_works - move linked works to a list | ||
| 941 | * @work: start of series of works to be scheduled | ||
| 942 | * @head: target list to append @work to | ||
| 943 | * @nextp: out paramter for nested worklist walking | ||
| 944 | * | ||
| 945 | * Schedule linked works starting from @work to @head. Work series to | ||
| 946 | * be scheduled starts at @work and includes any consecutive work with | ||
| 947 | * WORK_STRUCT_LINKED set in its predecessor. | ||
| 948 | * | ||
| 949 | * If @nextp is not NULL, it's updated to point to the next work of | ||
| 950 | * the last scheduled work. This allows move_linked_works() to be | ||
| 951 | * nested inside outer list_for_each_entry_safe(). | ||
| 952 | * | ||
| 953 | * CONTEXT: | ||
| 954 | * spin_lock_irq(gcwq->lock). | ||
| 955 | */ | ||
| 956 | static void move_linked_works(struct work_struct *work, struct list_head *head, | ||
| 957 | struct work_struct **nextp) | ||
| 958 | { | ||
| 959 | struct work_struct *n; | ||
| 960 | |||
| 961 | /* | ||
| 962 | * Linked worklist will always end before the end of the list, | ||
| 963 | * use NULL for list head. | ||
| 964 | */ | ||
| 965 | list_for_each_entry_safe_from(work, n, NULL, entry) { | ||
| 966 | list_move_tail(&work->entry, head); | ||
| 967 | if (!(*work_data_bits(work) & WORK_STRUCT_LINKED)) | ||
| 968 | break; | ||
| 969 | } | ||
| 970 | |||
| 971 | /* | ||
| 972 | * If we're already inside safe list traversal and have moved | ||
| 973 | * multiple works to the scheduled queue, the next position | ||
| 974 | * needs to be updated. | ||
| 975 | */ | ||
| 976 | if (nextp) | ||
| 977 | *nextp = n; | ||
| 978 | } | ||
| 979 | |||
| 980 | static void cwq_activate_delayed_work(struct work_struct *work) | ||
| 981 | { | ||
| 982 | struct cpu_workqueue_struct *cwq = get_work_cwq(work); | ||
| 983 | |||
| 984 | trace_workqueue_activate_work(work); | ||
| 985 | move_linked_works(work, &cwq->pool->worklist, NULL); | ||
| 986 | __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); | ||
| 987 | cwq->nr_active++; | ||
| 988 | } | ||
| 989 | |||
| 990 | static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) | ||
| 991 | { | ||
| 992 | struct work_struct *work = list_first_entry(&cwq->delayed_works, | ||
| 993 | struct work_struct, entry); | ||
| 994 | |||
| 995 | cwq_activate_delayed_work(work); | ||
| 996 | } | ||
| 997 | |||
| 998 | /** | ||
| 999 | * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight | ||
| 1000 | * @cwq: cwq of interest | ||
| 1001 | * @color: color of work which left the queue | ||
| 1002 | * | ||
| 1003 | * A work either has completed or is removed from pending queue, | ||
| 1004 | * decrement nr_in_flight of its cwq and handle workqueue flushing. | ||
| 1005 | * | ||
| 1006 | * CONTEXT: | ||
| 1007 | * spin_lock_irq(gcwq->lock). | ||
| 1008 | */ | ||
| 1009 | static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) | ||
| 1010 | { | ||
| 1011 | /* ignore uncolored works */ | ||
| 1012 | if (color == WORK_NO_COLOR) | ||
| 1013 | return; | ||
| 1014 | |||
| 1015 | cwq->nr_in_flight[color]--; | ||
| 1016 | |||
| 1017 | cwq->nr_active--; | ||
| 1018 | if (!list_empty(&cwq->delayed_works)) { | ||
| 1019 | /* one down, submit a delayed one */ | ||
| 1020 | if (cwq->nr_active < cwq->max_active) | ||
| 1021 | cwq_activate_first_delayed(cwq); | ||
| 1022 | } | ||
| 1023 | |||
| 1024 | /* is flush in progress and are we at the flushing tip? */ | ||
| 1025 | if (likely(cwq->flush_color != color)) | ||
| 1026 | return; | ||
| 1027 | |||
| 1028 | /* are there still in-flight works? */ | ||
| 1029 | if (cwq->nr_in_flight[color]) | ||
| 1030 | return; | ||
| 1031 | |||
| 1032 | /* this cwq is done, clear flush_color */ | ||
| 1033 | cwq->flush_color = -1; | ||
| 1034 | |||
| 1035 | /* | ||
| 1036 | * If this was the last cwq, wake up the first flusher. It | ||
| 1037 | * will handle the rest. | ||
| 1038 | */ | ||
| 1039 | if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush)) | ||
| 1040 | complete(&cwq->wq->first_flusher->done); | ||
| 1041 | } | ||
| 1042 | |||
| 1043 | /** | ||
| 1044 | * try_to_grab_pending - steal work item from worklist and disable irq | ||
| 1045 | * @work: work item to steal | ||
| 1046 | * @is_dwork: @work is a delayed_work | ||
| 1047 | * @flags: place to store irq state | ||
| 1048 | * | ||
| 1049 | * Try to grab PENDING bit of @work. This function can handle @work in any | ||
| 1050 | * stable state - idle, on timer or on worklist. Return values are | ||
| 1051 | * | ||
| 1052 | * 1 if @work was pending and we successfully stole PENDING | ||
| 1053 | * 0 if @work was idle and we claimed PENDING | ||
| 1054 | * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry | ||
| 1055 | * -ENOENT if someone else is canceling @work, this state may persist | ||
| 1056 | * for arbitrarily long | ||
| 1057 | * | ||
| 1058 | * On >= 0 return, the caller owns @work's PENDING bit. To avoid getting | ||
| 1059 | * interrupted while holding PENDING and @work off queue, irq must be | ||
| 1060 | * disabled on entry. This, combined with delayed_work->timer being | ||
| 1061 | * irqsafe, ensures that we return -EAGAIN for finite short period of time. | ||
| 1062 | * | ||
| 1063 | * On successful return, >= 0, irq is disabled and the caller is | ||
| 1064 | * responsible for releasing it using local_irq_restore(*@flags). | ||
| 1065 | * | ||
| 1066 | * This function is safe to call from any context including IRQ handler. | ||
| 1067 | */ | ||
| 1068 | static int try_to_grab_pending(struct work_struct *work, bool is_dwork, | ||
| 1069 | unsigned long *flags) | ||
| 1070 | { | ||
| 1071 | struct global_cwq *gcwq; | ||
| 1072 | |||
| 1073 | local_irq_save(*flags); | ||
| 1074 | |||
| 1075 | /* try to steal the timer if it exists */ | ||
| 1076 | if (is_dwork) { | ||
| 1077 | struct delayed_work *dwork = to_delayed_work(work); | ||
| 1078 | |||
| 1079 | /* | ||
| 1080 | * dwork->timer is irqsafe. If del_timer() fails, it's | ||
| 1081 | * guaranteed that the timer is not queued anywhere and not | ||
| 1082 | * running on the local CPU. | ||
| 1083 | */ | ||
| 1084 | if (likely(del_timer(&dwork->timer))) | ||
| 1085 | return 1; | ||
| 1086 | } | ||
| 1087 | |||
| 1088 | /* try to claim PENDING the normal way */ | ||
| 1089 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) | ||
| 1090 | return 0; | ||
| 1091 | |||
| 1092 | /* | ||
| 1093 | * The queueing is in progress, or it is already queued. Try to | ||
| 1094 | * steal it from ->worklist without clearing WORK_STRUCT_PENDING. | ||
| 1095 | */ | ||
| 1096 | gcwq = get_work_gcwq(work); | ||
| 1097 | if (!gcwq) | ||
| 1098 | goto fail; | ||
| 1099 | |||
| 1100 | spin_lock(&gcwq->lock); | ||
| 1101 | if (!list_empty(&work->entry)) { | ||
| 1102 | /* | ||
| 1103 | * This work is queued, but perhaps we locked the wrong gcwq. | ||
| 1104 | * In that case we must see the new value after rmb(), see | ||
| 1105 | * insert_work()->wmb(). | ||
| 1106 | */ | ||
| 1107 | smp_rmb(); | ||
| 1108 | if (gcwq == get_work_gcwq(work)) { | ||
| 1109 | debug_work_deactivate(work); | ||
| 1110 | |||
| 1111 | /* | ||
| 1112 | * A delayed work item cannot be grabbed directly | ||
| 1113 | * because it might have linked NO_COLOR work items | ||
| 1114 | * which, if left on the delayed_list, will confuse | ||
| 1115 | * cwq->nr_active management later on and cause | ||
| 1116 | * stall. Make sure the work item is activated | ||
| 1117 | * before grabbing. | ||
| 1118 | */ | ||
| 1119 | if (*work_data_bits(work) & WORK_STRUCT_DELAYED) | ||
| 1120 | cwq_activate_delayed_work(work); | ||
| 1121 | |||
| 1122 | list_del_init(&work->entry); | ||
| 1123 | cwq_dec_nr_in_flight(get_work_cwq(work), | ||
| 1124 | get_work_color(work)); | ||
| 1125 | |||
| 1126 | spin_unlock(&gcwq->lock); | ||
| 1127 | return 1; | ||
| 1128 | } | ||
| 1129 | } | ||
| 1130 | spin_unlock(&gcwq->lock); | ||
| 1131 | fail: | ||
| 1132 | local_irq_restore(*flags); | ||
| 1133 | if (work_is_canceling(work)) | ||
| 1134 | return -ENOENT; | ||
| 1135 | cpu_relax(); | ||
| 1136 | return -EAGAIN; | ||
| 1137 | } | ||
| 1138 | |||
| 1139 | /** | ||
| 906 | * insert_work - insert a work into gcwq | 1140 | * insert_work - insert a work into gcwq |
| 907 | * @cwq: cwq @work belongs to | 1141 | * @cwq: cwq @work belongs to |
| 908 | * @work: work to insert | 1142 | * @work: work to insert |
| @@ -982,7 +1216,15 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
| 982 | struct cpu_workqueue_struct *cwq; | 1216 | struct cpu_workqueue_struct *cwq; |
| 983 | struct list_head *worklist; | 1217 | struct list_head *worklist; |
| 984 | unsigned int work_flags; | 1218 | unsigned int work_flags; |
| 985 | unsigned long flags; | 1219 | unsigned int req_cpu = cpu; |
| 1220 | |||
| 1221 | /* | ||
| 1222 | * While a work item is PENDING && off queue, a task trying to | ||
| 1223 | * steal the PENDING will busy-loop waiting for it to either get | ||
| 1224 | * queued or lose PENDING. Grabbing PENDING and queueing should | ||
| 1225 | * happen with IRQ disabled. | ||
| 1226 | */ | ||
| 1227 | WARN_ON_ONCE(!irqs_disabled()); | ||
| 986 | 1228 | ||
| 987 | debug_work_activate(work); | 1229 | debug_work_activate(work); |
| 988 | 1230 | ||
| @@ -995,21 +1237,22 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
| 995 | if (!(wq->flags & WQ_UNBOUND)) { | 1237 | if (!(wq->flags & WQ_UNBOUND)) { |
| 996 | struct global_cwq *last_gcwq; | 1238 | struct global_cwq *last_gcwq; |
| 997 | 1239 | ||
| 998 | if (unlikely(cpu == WORK_CPU_UNBOUND)) | 1240 | if (cpu == WORK_CPU_UNBOUND) |
| 999 | cpu = raw_smp_processor_id(); | 1241 | cpu = raw_smp_processor_id(); |
| 1000 | 1242 | ||
| 1001 | /* | 1243 | /* |
| 1002 | * It's multi cpu. If @wq is non-reentrant and @work | 1244 | * It's multi cpu. If @work was previously on a different |
| 1003 | * was previously on a different cpu, it might still | 1245 | * cpu, it might still be running there, in which case the |
| 1004 | * be running there, in which case the work needs to | 1246 | * work needs to be queued on that cpu to guarantee |
| 1005 | * be queued on that cpu to guarantee non-reentrance. | 1247 | * non-reentrancy. |
| 1006 | */ | 1248 | */ |
| 1007 | gcwq = get_gcwq(cpu); | 1249 | gcwq = get_gcwq(cpu); |
| 1008 | if (wq->flags & WQ_NON_REENTRANT && | 1250 | last_gcwq = get_work_gcwq(work); |
| 1009 | (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) { | 1251 | |
| 1252 | if (last_gcwq && last_gcwq != gcwq) { | ||
| 1010 | struct worker *worker; | 1253 | struct worker *worker; |
| 1011 | 1254 | ||
| 1012 | spin_lock_irqsave(&last_gcwq->lock, flags); | 1255 | spin_lock(&last_gcwq->lock); |
| 1013 | 1256 | ||
| 1014 | worker = find_worker_executing_work(last_gcwq, work); | 1257 | worker = find_worker_executing_work(last_gcwq, work); |
| 1015 | 1258 | ||
| @@ -1017,22 +1260,23 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
| 1017 | gcwq = last_gcwq; | 1260 | gcwq = last_gcwq; |
| 1018 | else { | 1261 | else { |
| 1019 | /* meh... not running there, queue here */ | 1262 | /* meh... not running there, queue here */ |
| 1020 | spin_unlock_irqrestore(&last_gcwq->lock, flags); | 1263 | spin_unlock(&last_gcwq->lock); |
| 1021 | spin_lock_irqsave(&gcwq->lock, flags); | 1264 | spin_lock(&gcwq->lock); |
| 1022 | } | 1265 | } |
| 1023 | } else | 1266 | } else { |
| 1024 | spin_lock_irqsave(&gcwq->lock, flags); | 1267 | spin_lock(&gcwq->lock); |
| 1268 | } | ||
| 1025 | } else { | 1269 | } else { |
| 1026 | gcwq = get_gcwq(WORK_CPU_UNBOUND); | 1270 | gcwq = get_gcwq(WORK_CPU_UNBOUND); |
| 1027 | spin_lock_irqsave(&gcwq->lock, flags); | 1271 | spin_lock(&gcwq->lock); |
| 1028 | } | 1272 | } |
| 1029 | 1273 | ||
| 1030 | /* gcwq determined, get cwq and queue */ | 1274 | /* gcwq determined, get cwq and queue */ |
| 1031 | cwq = get_cwq(gcwq->cpu, wq); | 1275 | cwq = get_cwq(gcwq->cpu, wq); |
| 1032 | trace_workqueue_queue_work(cpu, cwq, work); | 1276 | trace_workqueue_queue_work(req_cpu, cwq, work); |
| 1033 | 1277 | ||
| 1034 | if (WARN_ON(!list_empty(&work->entry))) { | 1278 | if (WARN_ON(!list_empty(&work->entry))) { |
| 1035 | spin_unlock_irqrestore(&gcwq->lock, flags); | 1279 | spin_unlock(&gcwq->lock); |
| 1036 | return; | 1280 | return; |
| 1037 | } | 1281 | } |
| 1038 | 1282 | ||
| @@ -1050,79 +1294,110 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
| 1050 | 1294 | ||
| 1051 | insert_work(cwq, work, worklist, work_flags); | 1295 | insert_work(cwq, work, worklist, work_flags); |
| 1052 | 1296 | ||
| 1053 | spin_unlock_irqrestore(&gcwq->lock, flags); | 1297 | spin_unlock(&gcwq->lock); |
| 1054 | } | 1298 | } |
| 1055 | 1299 | ||
| 1056 | /** | 1300 | /** |
| 1057 | * queue_work - queue work on a workqueue | 1301 | * queue_work_on - queue work on specific cpu |
| 1302 | * @cpu: CPU number to execute work on | ||
| 1058 | * @wq: workqueue to use | 1303 | * @wq: workqueue to use |
| 1059 | * @work: work to queue | 1304 | * @work: work to queue |
| 1060 | * | 1305 | * |
| 1061 | * Returns 0 if @work was already on a queue, non-zero otherwise. | 1306 | * Returns %false if @work was already on a queue, %true otherwise. |
| 1062 | * | 1307 | * |
| 1063 | * We queue the work to the CPU on which it was submitted, but if the CPU dies | 1308 | * We queue the work to a specific CPU, the caller must ensure it |
| 1064 | * it can be processed by another CPU. | 1309 | * can't go away. |
| 1065 | */ | 1310 | */ |
| 1066 | int queue_work(struct workqueue_struct *wq, struct work_struct *work) | 1311 | bool queue_work_on(int cpu, struct workqueue_struct *wq, |
| 1312 | struct work_struct *work) | ||
| 1067 | { | 1313 | { |
| 1068 | int ret; | 1314 | bool ret = false; |
| 1315 | unsigned long flags; | ||
| 1069 | 1316 | ||
| 1070 | ret = queue_work_on(get_cpu(), wq, work); | 1317 | local_irq_save(flags); |
| 1071 | put_cpu(); | 1318 | |
| 1319 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | ||
| 1320 | __queue_work(cpu, wq, work); | ||
| 1321 | ret = true; | ||
| 1322 | } | ||
| 1072 | 1323 | ||
| 1324 | local_irq_restore(flags); | ||
| 1073 | return ret; | 1325 | return ret; |
| 1074 | } | 1326 | } |
| 1075 | EXPORT_SYMBOL_GPL(queue_work); | 1327 | EXPORT_SYMBOL_GPL(queue_work_on); |
| 1076 | 1328 | ||
| 1077 | /** | 1329 | /** |
| 1078 | * queue_work_on - queue work on specific cpu | 1330 | * queue_work - queue work on a workqueue |
| 1079 | * @cpu: CPU number to execute work on | ||
| 1080 | * @wq: workqueue to use | 1331 | * @wq: workqueue to use |
| 1081 | * @work: work to queue | 1332 | * @work: work to queue |
| 1082 | * | 1333 | * |
| 1083 | * Returns 0 if @work was already on a queue, non-zero otherwise. | 1334 | * Returns %false if @work was already on a queue, %true otherwise. |
| 1084 | * | 1335 | * |
| 1085 | * We queue the work to a specific CPU, the caller must ensure it | 1336 | * We queue the work to the CPU on which it was submitted, but if the CPU dies |
| 1086 | * can't go away. | 1337 | * it can be processed by another CPU. |
| 1087 | */ | 1338 | */ |
| 1088 | int | 1339 | bool queue_work(struct workqueue_struct *wq, struct work_struct *work) |
| 1089 | queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work) | ||
| 1090 | { | 1340 | { |
| 1091 | int ret = 0; | 1341 | return queue_work_on(WORK_CPU_UNBOUND, wq, work); |
| 1092 | |||
| 1093 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | ||
| 1094 | __queue_work(cpu, wq, work); | ||
| 1095 | ret = 1; | ||
| 1096 | } | ||
| 1097 | return ret; | ||
| 1098 | } | 1342 | } |
| 1099 | EXPORT_SYMBOL_GPL(queue_work_on); | 1343 | EXPORT_SYMBOL_GPL(queue_work); |
| 1100 | 1344 | ||
| 1101 | static void delayed_work_timer_fn(unsigned long __data) | 1345 | void delayed_work_timer_fn(unsigned long __data) |
| 1102 | { | 1346 | { |
| 1103 | struct delayed_work *dwork = (struct delayed_work *)__data; | 1347 | struct delayed_work *dwork = (struct delayed_work *)__data; |
| 1104 | struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work); | 1348 | struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work); |
| 1105 | 1349 | ||
| 1106 | __queue_work(smp_processor_id(), cwq->wq, &dwork->work); | 1350 | /* should have been called from irqsafe timer with irq already off */ |
| 1351 | __queue_work(dwork->cpu, cwq->wq, &dwork->work); | ||
| 1107 | } | 1352 | } |
| 1353 | EXPORT_SYMBOL_GPL(delayed_work_timer_fn); | ||
| 1108 | 1354 | ||
| 1109 | /** | 1355 | static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, |
| 1110 | * queue_delayed_work - queue work on a workqueue after delay | 1356 | struct delayed_work *dwork, unsigned long delay) |
| 1111 | * @wq: workqueue to use | ||
| 1112 | * @dwork: delayable work to queue | ||
| 1113 | * @delay: number of jiffies to wait before queueing | ||
| 1114 | * | ||
| 1115 | * Returns 0 if @work was already on a queue, non-zero otherwise. | ||
| 1116 | */ | ||
| 1117 | int queue_delayed_work(struct workqueue_struct *wq, | ||
| 1118 | struct delayed_work *dwork, unsigned long delay) | ||
| 1119 | { | 1357 | { |
| 1120 | if (delay == 0) | 1358 | struct timer_list *timer = &dwork->timer; |
| 1121 | return queue_work(wq, &dwork->work); | 1359 | struct work_struct *work = &dwork->work; |
| 1360 | unsigned int lcpu; | ||
| 1361 | |||
| 1362 | WARN_ON_ONCE(timer->function != delayed_work_timer_fn || | ||
| 1363 | timer->data != (unsigned long)dwork); | ||
| 1364 | BUG_ON(timer_pending(timer)); | ||
| 1365 | BUG_ON(!list_empty(&work->entry)); | ||
| 1366 | |||
| 1367 | timer_stats_timer_set_start_info(&dwork->timer); | ||
| 1368 | |||
| 1369 | /* | ||
| 1370 | * This stores cwq for the moment, for the timer_fn. Note that the | ||
| 1371 | * work's gcwq is preserved to allow reentrance detection for | ||
| 1372 | * delayed works. | ||
| 1373 | */ | ||
| 1374 | if (!(wq->flags & WQ_UNBOUND)) { | ||
| 1375 | struct global_cwq *gcwq = get_work_gcwq(work); | ||
| 1122 | 1376 | ||
| 1123 | return queue_delayed_work_on(-1, wq, dwork, delay); | 1377 | /* |
| 1378 | * If we cannot get the last gcwq from @work directly, | ||
| 1379 | * select the last CPU such that it avoids unnecessarily | ||
| 1380 | * triggering non-reentrancy check in __queue_work(). | ||
| 1381 | */ | ||
| 1382 | lcpu = cpu; | ||
| 1383 | if (gcwq) | ||
| 1384 | lcpu = gcwq->cpu; | ||
| 1385 | if (lcpu == WORK_CPU_UNBOUND) | ||
| 1386 | lcpu = raw_smp_processor_id(); | ||
| 1387 | } else { | ||
| 1388 | lcpu = WORK_CPU_UNBOUND; | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | set_work_cwq(work, get_cwq(lcpu, wq), 0); | ||
| 1392 | |||
| 1393 | dwork->cpu = cpu; | ||
| 1394 | timer->expires = jiffies + delay; | ||
| 1395 | |||
| 1396 | if (unlikely(cpu != WORK_CPU_UNBOUND)) | ||
| 1397 | add_timer_on(timer, cpu); | ||
| 1398 | else | ||
| 1399 | add_timer(timer); | ||
| 1124 | } | 1400 | } |
| 1125 | EXPORT_SYMBOL_GPL(queue_delayed_work); | ||
| 1126 | 1401 | ||
| 1127 | /** | 1402 | /** |
| 1128 | * queue_delayed_work_on - queue work on specific CPU after delay | 1403 | * queue_delayed_work_on - queue work on specific CPU after delay |
| @@ -1131,53 +1406,100 @@ EXPORT_SYMBOL_GPL(queue_delayed_work); | |||
| 1131 | * @dwork: work to queue | 1406 | * @dwork: work to queue |
| 1132 | * @delay: number of jiffies to wait before queueing | 1407 | * @delay: number of jiffies to wait before queueing |
| 1133 | * | 1408 | * |
| 1134 | * Returns 0 if @work was already on a queue, non-zero otherwise. | 1409 | * Returns %false if @work was already on a queue, %true otherwise. If |
| 1410 | * @delay is zero and @dwork is idle, it will be scheduled for immediate | ||
| 1411 | * execution. | ||
| 1135 | */ | 1412 | */ |
| 1136 | int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, | 1413 | bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, |
| 1137 | struct delayed_work *dwork, unsigned long delay) | 1414 | struct delayed_work *dwork, unsigned long delay) |
| 1138 | { | 1415 | { |
| 1139 | int ret = 0; | ||
| 1140 | struct timer_list *timer = &dwork->timer; | ||
| 1141 | struct work_struct *work = &dwork->work; | 1416 | struct work_struct *work = &dwork->work; |
| 1417 | bool ret = false; | ||
| 1418 | unsigned long flags; | ||
| 1142 | 1419 | ||
| 1143 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | 1420 | if (!delay) |
| 1144 | unsigned int lcpu; | 1421 | return queue_work_on(cpu, wq, &dwork->work); |
| 1145 | 1422 | ||
| 1146 | BUG_ON(timer_pending(timer)); | 1423 | /* read the comment in __queue_work() */ |
| 1147 | BUG_ON(!list_empty(&work->entry)); | 1424 | local_irq_save(flags); |
| 1148 | 1425 | ||
| 1149 | timer_stats_timer_set_start_info(&dwork->timer); | 1426 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { |
| 1427 | __queue_delayed_work(cpu, wq, dwork, delay); | ||
| 1428 | ret = true; | ||
| 1429 | } | ||
| 1150 | 1430 | ||
| 1151 | /* | 1431 | local_irq_restore(flags); |
| 1152 | * This stores cwq for the moment, for the timer_fn. | 1432 | return ret; |
| 1153 | * Note that the work's gcwq is preserved to allow | 1433 | } |
| 1154 | * reentrance detection for delayed works. | 1434 | EXPORT_SYMBOL_GPL(queue_delayed_work_on); |
| 1155 | */ | ||
| 1156 | if (!(wq->flags & WQ_UNBOUND)) { | ||
| 1157 | struct global_cwq *gcwq = get_work_gcwq(work); | ||
| 1158 | 1435 | ||
| 1159 | if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND) | 1436 | /** |
| 1160 | lcpu = gcwq->cpu; | 1437 | * queue_delayed_work - queue work on a workqueue after delay |
| 1161 | else | 1438 | * @wq: workqueue to use |
| 1162 | lcpu = raw_smp_processor_id(); | 1439 | * @dwork: delayable work to queue |
| 1163 | } else | 1440 | * @delay: number of jiffies to wait before queueing |
| 1164 | lcpu = WORK_CPU_UNBOUND; | 1441 | * |
| 1442 | * Equivalent to queue_delayed_work_on() but tries to use the local CPU. | ||
| 1443 | */ | ||
| 1444 | bool queue_delayed_work(struct workqueue_struct *wq, | ||
| 1445 | struct delayed_work *dwork, unsigned long delay) | ||
| 1446 | { | ||
| 1447 | return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); | ||
| 1448 | } | ||
| 1449 | EXPORT_SYMBOL_GPL(queue_delayed_work); | ||
| 1165 | 1450 | ||
| 1166 | set_work_cwq(work, get_cwq(lcpu, wq), 0); | 1451 | /** |
| 1452 | * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU | ||
| 1453 | * @cpu: CPU number to execute work on | ||
| 1454 | * @wq: workqueue to use | ||
| 1455 | * @dwork: work to queue | ||
| 1456 | * @delay: number of jiffies to wait before queueing | ||
| 1457 | * | ||
| 1458 | * If @dwork is idle, equivalent to queue_delayed_work_on(); otherwise, | ||
| 1459 | * modify @dwork's timer so that it expires after @delay. If @delay is | ||
| 1460 | * zero, @work is guaranteed to be scheduled immediately regardless of its | ||
| 1461 | * current state. | ||
| 1462 | * | ||
| 1463 | * Returns %false if @dwork was idle and queued, %true if @dwork was | ||
| 1464 | * pending and its timer was modified. | ||
| 1465 | * | ||
| 1466 | * This function is safe to call from any context including IRQ handler. | ||
| 1467 | * See try_to_grab_pending() for details. | ||
| 1468 | */ | ||
| 1469 | bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, | ||
| 1470 | struct delayed_work *dwork, unsigned long delay) | ||
| 1471 | { | ||
| 1472 | unsigned long flags; | ||
| 1473 | int ret; | ||
| 1167 | 1474 | ||
| 1168 | timer->expires = jiffies + delay; | 1475 | do { |
| 1169 | timer->data = (unsigned long)dwork; | 1476 | ret = try_to_grab_pending(&dwork->work, true, &flags); |
| 1170 | timer->function = delayed_work_timer_fn; | 1477 | } while (unlikely(ret == -EAGAIN)); |
| 1171 | 1478 | ||
| 1172 | if (unlikely(cpu >= 0)) | 1479 | if (likely(ret >= 0)) { |
| 1173 | add_timer_on(timer, cpu); | 1480 | __queue_delayed_work(cpu, wq, dwork, delay); |
| 1174 | else | 1481 | local_irq_restore(flags); |
| 1175 | add_timer(timer); | ||
| 1176 | ret = 1; | ||
| 1177 | } | 1482 | } |
| 1483 | |||
| 1484 | /* -ENOENT from try_to_grab_pending() becomes %true */ | ||
| 1178 | return ret; | 1485 | return ret; |
| 1179 | } | 1486 | } |
| 1180 | EXPORT_SYMBOL_GPL(queue_delayed_work_on); | 1487 | EXPORT_SYMBOL_GPL(mod_delayed_work_on); |
| 1488 | |||
| 1489 | /** | ||
| 1490 | * mod_delayed_work - modify delay of or queue a delayed work | ||
| 1491 | * @wq: workqueue to use | ||
| 1492 | * @dwork: work to queue | ||
| 1493 | * @delay: number of jiffies to wait before queueing | ||
| 1494 | * | ||
| 1495 | * mod_delayed_work_on() on local CPU. | ||
| 1496 | */ | ||
| 1497 | bool mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork, | ||
| 1498 | unsigned long delay) | ||
| 1499 | { | ||
| 1500 | return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); | ||
| 1501 | } | ||
| 1502 | EXPORT_SYMBOL_GPL(mod_delayed_work); | ||
| 1181 | 1503 | ||
| 1182 | /** | 1504 | /** |
| 1183 | * worker_enter_idle - enter idle state | 1505 | * worker_enter_idle - enter idle state |
| @@ -1305,37 +1627,21 @@ __acquires(&gcwq->lock) | |||
| 1305 | } | 1627 | } |
| 1306 | } | 1628 | } |
| 1307 | 1629 | ||
| 1308 | struct idle_rebind { | ||
| 1309 | int cnt; /* # workers to be rebound */ | ||
| 1310 | struct completion done; /* all workers rebound */ | ||
| 1311 | }; | ||
| 1312 | |||
| 1313 | /* | 1630 | /* |
| 1314 | * Rebind an idle @worker to its CPU. During CPU onlining, this has to | 1631 | * Rebind an idle @worker to its CPU. worker_thread() will test |
| 1315 | * happen synchronously for idle workers. worker_thread() will test | 1632 | * list_empty(@worker->entry) before leaving idle and call this function. |
| 1316 | * %WORKER_REBIND before leaving idle and call this function. | ||
| 1317 | */ | 1633 | */ |
| 1318 | static void idle_worker_rebind(struct worker *worker) | 1634 | static void idle_worker_rebind(struct worker *worker) |
| 1319 | { | 1635 | { |
| 1320 | struct global_cwq *gcwq = worker->pool->gcwq; | 1636 | struct global_cwq *gcwq = worker->pool->gcwq; |
| 1321 | 1637 | ||
| 1322 | /* CPU must be online at this point */ | 1638 | /* CPU may go down again inbetween, clear UNBOUND only on success */ |
| 1323 | WARN_ON(!worker_maybe_bind_and_lock(worker)); | 1639 | if (worker_maybe_bind_and_lock(worker)) |
| 1324 | if (!--worker->idle_rebind->cnt) | 1640 | worker_clr_flags(worker, WORKER_UNBOUND); |
| 1325 | complete(&worker->idle_rebind->done); | ||
| 1326 | spin_unlock_irq(&worker->pool->gcwq->lock); | ||
| 1327 | 1641 | ||
| 1328 | /* we did our part, wait for rebind_workers() to finish up */ | 1642 | /* rebind complete, become available again */ |
| 1329 | wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); | 1643 | list_add(&worker->entry, &worker->pool->idle_list); |
| 1330 | 1644 | spin_unlock_irq(&gcwq->lock); | |
| 1331 | /* | ||
| 1332 | * rebind_workers() shouldn't finish until all workers passed the | ||
| 1333 | * above WORKER_REBIND wait. Tell it when done. | ||
| 1334 | */ | ||
| 1335 | spin_lock_irq(&worker->pool->gcwq->lock); | ||
| 1336 | if (!--worker->idle_rebind->cnt) | ||
| 1337 | complete(&worker->idle_rebind->done); | ||
| 1338 | spin_unlock_irq(&worker->pool->gcwq->lock); | ||
| 1339 | } | 1645 | } |
| 1340 | 1646 | ||
| 1341 | /* | 1647 | /* |
| @@ -1349,16 +1655,8 @@ static void busy_worker_rebind_fn(struct work_struct *work) | |||
| 1349 | struct worker *worker = container_of(work, struct worker, rebind_work); | 1655 | struct worker *worker = container_of(work, struct worker, rebind_work); |
| 1350 | struct global_cwq *gcwq = worker->pool->gcwq; | 1656 | struct global_cwq *gcwq = worker->pool->gcwq; |
| 1351 | 1657 | ||
| 1352 | worker_maybe_bind_and_lock(worker); | 1658 | if (worker_maybe_bind_and_lock(worker)) |
| 1353 | 1659 | worker_clr_flags(worker, WORKER_UNBOUND); | |
| 1354 | /* | ||
| 1355 | * %WORKER_REBIND must be cleared even if the above binding failed; | ||
| 1356 | * otherwise, we may confuse the next CPU_UP cycle or oops / get | ||
| 1357 | * stuck by calling idle_worker_rebind() prematurely. If CPU went | ||
| 1358 | * down again inbetween, %WORKER_UNBOUND would be set, so clearing | ||
| 1359 | * %WORKER_REBIND is always safe. | ||
| 1360 | */ | ||
| 1361 | worker_clr_flags(worker, WORKER_REBIND); | ||
| 1362 | 1660 | ||
| 1363 | spin_unlock_irq(&gcwq->lock); | 1661 | spin_unlock_irq(&gcwq->lock); |
| 1364 | } | 1662 | } |
| @@ -1370,123 +1668,74 @@ static void busy_worker_rebind_fn(struct work_struct *work) | |||
| 1370 | * @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding | 1668 | * @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding |
| 1371 | * is different for idle and busy ones. | 1669 | * is different for idle and busy ones. |
| 1372 | * | 1670 | * |
| 1373 | * The idle ones should be rebound synchronously and idle rebinding should | 1671 | * Idle ones will be removed from the idle_list and woken up. They will |
| 1374 | * be complete before any worker starts executing work items with | 1672 | * add themselves back after completing rebind. This ensures that the |
| 1375 | * concurrency management enabled; otherwise, scheduler may oops trying to | 1673 | * idle_list doesn't contain any unbound workers when re-bound busy workers |
| 1376 | * wake up non-local idle worker from wq_worker_sleeping(). | 1674 | * try to perform local wake-ups for concurrency management. |
| 1377 | * | 1675 | * |
| 1378 | * This is achieved by repeatedly requesting rebinding until all idle | 1676 | * Busy workers can rebind after they finish their current work items. |
| 1379 | * workers are known to have been rebound under @gcwq->lock and holding all | 1677 | * Queueing the rebind work item at the head of the scheduled list is |
| 1380 | * idle workers from becoming busy until idle rebinding is complete. | 1678 | * enough. Note that nr_running will be properly bumped as busy workers |
| 1679 | * rebind. | ||
| 1381 | * | 1680 | * |
| 1382 | * Once idle workers are rebound, busy workers can be rebound as they | 1681 | * On return, all non-manager workers are scheduled for rebind - see |
| 1383 | * finish executing their current work items. Queueing the rebind work at | 1682 | * manage_workers() for the manager special case. Any idle worker |
| 1384 | * the head of their scheduled lists is enough. Note that nr_running will | 1683 | * including the manager will not appear on @idle_list until rebind is |
| 1385 | * be properbly bumped as busy workers rebind. | 1684 | * complete, making local wake-ups safe. |
| 1386 | * | ||
| 1387 | * On return, all workers are guaranteed to either be bound or have rebind | ||
| 1388 | * work item scheduled. | ||
| 1389 | */ | 1685 | */ |
| 1390 | static void rebind_workers(struct global_cwq *gcwq) | 1686 | static void rebind_workers(struct global_cwq *gcwq) |
| 1391 | __releases(&gcwq->lock) __acquires(&gcwq->lock) | ||
| 1392 | { | 1687 | { |
| 1393 | struct idle_rebind idle_rebind; | ||
| 1394 | struct worker_pool *pool; | 1688 | struct worker_pool *pool; |
| 1395 | struct worker *worker; | 1689 | struct worker *worker, *n; |
| 1396 | struct hlist_node *pos; | 1690 | struct hlist_node *pos; |
| 1397 | int i; | 1691 | int i; |
| 1398 | 1692 | ||
| 1399 | lockdep_assert_held(&gcwq->lock); | 1693 | lockdep_assert_held(&gcwq->lock); |
| 1400 | 1694 | ||
| 1401 | for_each_worker_pool(pool, gcwq) | 1695 | for_each_worker_pool(pool, gcwq) |
| 1402 | lockdep_assert_held(&pool->manager_mutex); | 1696 | lockdep_assert_held(&pool->assoc_mutex); |
| 1403 | 1697 | ||
| 1404 | /* | 1698 | /* dequeue and kick idle ones */ |
| 1405 | * Rebind idle workers. Interlocked both ways. We wait for | ||
| 1406 | * workers to rebind via @idle_rebind.done. Workers will wait for | ||
| 1407 | * us to finish up by watching %WORKER_REBIND. | ||
| 1408 | */ | ||
| 1409 | init_completion(&idle_rebind.done); | ||
| 1410 | retry: | ||
| 1411 | idle_rebind.cnt = 1; | ||
| 1412 | INIT_COMPLETION(idle_rebind.done); | ||
| 1413 | |||
| 1414 | /* set REBIND and kick idle ones, we'll wait for these later */ | ||
| 1415 | for_each_worker_pool(pool, gcwq) { | 1699 | for_each_worker_pool(pool, gcwq) { |
| 1416 | list_for_each_entry(worker, &pool->idle_list, entry) { | 1700 | list_for_each_entry_safe(worker, n, &pool->idle_list, entry) { |
| 1417 | unsigned long worker_flags = worker->flags; | 1701 | /* |
| 1418 | 1702 | * idle workers should be off @pool->idle_list | |
| 1419 | if (worker->flags & WORKER_REBIND) | 1703 | * until rebind is complete to avoid receiving |
| 1420 | continue; | 1704 | * premature local wake-ups. |
| 1421 | 1705 | */ | |
| 1422 | /* morph UNBOUND to REBIND atomically */ | 1706 | list_del_init(&worker->entry); |
| 1423 | worker_flags &= ~WORKER_UNBOUND; | ||
| 1424 | worker_flags |= WORKER_REBIND; | ||
| 1425 | ACCESS_ONCE(worker->flags) = worker_flags; | ||
| 1426 | |||
| 1427 | idle_rebind.cnt++; | ||
| 1428 | worker->idle_rebind = &idle_rebind; | ||
| 1429 | 1707 | ||
| 1430 | /* worker_thread() will call idle_worker_rebind() */ | 1708 | /* |
| 1709 | * worker_thread() will see the above dequeuing | ||
| 1710 | * and call idle_worker_rebind(). | ||
| 1711 | */ | ||
| 1431 | wake_up_process(worker->task); | 1712 | wake_up_process(worker->task); |
| 1432 | } | 1713 | } |
| 1433 | } | 1714 | } |
| 1434 | 1715 | ||
| 1435 | if (--idle_rebind.cnt) { | 1716 | /* rebind busy workers */ |
| 1436 | spin_unlock_irq(&gcwq->lock); | ||
| 1437 | wait_for_completion(&idle_rebind.done); | ||
| 1438 | spin_lock_irq(&gcwq->lock); | ||
| 1439 | /* busy ones might have become idle while waiting, retry */ | ||
| 1440 | goto retry; | ||
| 1441 | } | ||
| 1442 | |||
| 1443 | /* all idle workers are rebound, rebind busy workers */ | ||
| 1444 | for_each_busy_worker(worker, i, pos, gcwq) { | 1717 | for_each_busy_worker(worker, i, pos, gcwq) { |
| 1445 | struct work_struct *rebind_work = &worker->rebind_work; | 1718 | struct work_struct *rebind_work = &worker->rebind_work; |
| 1446 | unsigned long worker_flags = worker->flags; | 1719 | struct workqueue_struct *wq; |
| 1447 | |||
| 1448 | /* morph UNBOUND to REBIND atomically */ | ||
| 1449 | worker_flags &= ~WORKER_UNBOUND; | ||
| 1450 | worker_flags |= WORKER_REBIND; | ||
| 1451 | ACCESS_ONCE(worker->flags) = worker_flags; | ||
| 1452 | 1720 | ||
| 1453 | if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, | 1721 | if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, |
| 1454 | work_data_bits(rebind_work))) | 1722 | work_data_bits(rebind_work))) |
| 1455 | continue; | 1723 | continue; |
| 1456 | 1724 | ||
| 1457 | /* wq doesn't matter, use the default one */ | ||
| 1458 | debug_work_activate(rebind_work); | 1725 | debug_work_activate(rebind_work); |
| 1459 | insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work, | ||
| 1460 | worker->scheduled.next, | ||
| 1461 | work_color_to_flags(WORK_NO_COLOR)); | ||
| 1462 | } | ||
| 1463 | |||
| 1464 | /* | ||
| 1465 | * All idle workers are rebound and waiting for %WORKER_REBIND to | ||
| 1466 | * be cleared inside idle_worker_rebind(). Clear and release. | ||
| 1467 | * Clearing %WORKER_REBIND from this foreign context is safe | ||
| 1468 | * because these workers are still guaranteed to be idle. | ||
| 1469 | * | ||
| 1470 | * We need to make sure all idle workers passed WORKER_REBIND wait | ||
| 1471 | * in idle_worker_rebind() before returning; otherwise, workers can | ||
| 1472 | * get stuck at the wait if hotplug cycle repeats. | ||
| 1473 | */ | ||
| 1474 | idle_rebind.cnt = 1; | ||
| 1475 | INIT_COMPLETION(idle_rebind.done); | ||
| 1476 | |||
| 1477 | for_each_worker_pool(pool, gcwq) { | ||
| 1478 | list_for_each_entry(worker, &pool->idle_list, entry) { | ||
| 1479 | worker->flags &= ~WORKER_REBIND; | ||
| 1480 | idle_rebind.cnt++; | ||
| 1481 | } | ||
| 1482 | } | ||
| 1483 | 1726 | ||
| 1484 | wake_up_all(&gcwq->rebind_hold); | 1727 | /* |
| 1728 | * wq doesn't really matter but let's keep @worker->pool | ||
| 1729 | * and @cwq->pool consistent for sanity. | ||
| 1730 | */ | ||
| 1731 | if (worker_pool_pri(worker->pool)) | ||
| 1732 | wq = system_highpri_wq; | ||
| 1733 | else | ||
| 1734 | wq = system_wq; | ||
| 1485 | 1735 | ||
| 1486 | if (--idle_rebind.cnt) { | 1736 | insert_work(get_cwq(gcwq->cpu, wq), rebind_work, |
| 1487 | spin_unlock_irq(&gcwq->lock); | 1737 | worker->scheduled.next, |
| 1488 | wait_for_completion(&idle_rebind.done); | 1738 | work_color_to_flags(WORK_NO_COLOR)); |
| 1489 | spin_lock_irq(&gcwq->lock); | ||
| 1490 | } | 1739 | } |
| 1491 | } | 1740 | } |
| 1492 | 1741 | ||
| @@ -1844,22 +2093,22 @@ static bool manage_workers(struct worker *worker) | |||
| 1844 | * grab %POOL_MANAGING_WORKERS to achieve this because that can | 2093 | * grab %POOL_MANAGING_WORKERS to achieve this because that can |
| 1845 | * lead to idle worker depletion (all become busy thinking someone | 2094 | * lead to idle worker depletion (all become busy thinking someone |
| 1846 | * else is managing) which in turn can result in deadlock under | 2095 | * else is managing) which in turn can result in deadlock under |
| 1847 | * extreme circumstances. Use @pool->manager_mutex to synchronize | 2096 | * extreme circumstances. Use @pool->assoc_mutex to synchronize |
| 1848 | * manager against CPU hotplug. | 2097 | * manager against CPU hotplug. |
| 1849 | * | 2098 | * |
| 1850 | * manager_mutex would always be free unless CPU hotplug is in | 2099 | * assoc_mutex would always be free unless CPU hotplug is in |
| 1851 | * progress. trylock first without dropping @gcwq->lock. | 2100 | * progress. trylock first without dropping @gcwq->lock. |
| 1852 | */ | 2101 | */ |
| 1853 | if (unlikely(!mutex_trylock(&pool->manager_mutex))) { | 2102 | if (unlikely(!mutex_trylock(&pool->assoc_mutex))) { |
| 1854 | spin_unlock_irq(&pool->gcwq->lock); | 2103 | spin_unlock_irq(&pool->gcwq->lock); |
| 1855 | mutex_lock(&pool->manager_mutex); | 2104 | mutex_lock(&pool->assoc_mutex); |
| 1856 | /* | 2105 | /* |
| 1857 | * CPU hotplug could have happened while we were waiting | 2106 | * CPU hotplug could have happened while we were waiting |
| 1858 | * for manager_mutex. Hotplug itself can't handle us | 2107 | * for assoc_mutex. Hotplug itself can't handle us |
| 1859 | * because manager isn't either on idle or busy list, and | 2108 | * because manager isn't either on idle or busy list, and |
| 1860 | * @gcwq's state and ours could have deviated. | 2109 | * @gcwq's state and ours could have deviated. |
| 1861 | * | 2110 | * |
| 1862 | * As hotplug is now excluded via manager_mutex, we can | 2111 | * As hotplug is now excluded via assoc_mutex, we can |
| 1863 | * simply try to bind. It will succeed or fail depending | 2112 | * simply try to bind. It will succeed or fail depending |
| 1864 | * on @gcwq's current state. Try it and adjust | 2113 | * on @gcwq's current state. Try it and adjust |
| 1865 | * %WORKER_UNBOUND accordingly. | 2114 | * %WORKER_UNBOUND accordingly. |
| @@ -1882,112 +2131,11 @@ static bool manage_workers(struct worker *worker) | |||
| 1882 | ret |= maybe_create_worker(pool); | 2131 | ret |= maybe_create_worker(pool); |
| 1883 | 2132 | ||
| 1884 | pool->flags &= ~POOL_MANAGING_WORKERS; | 2133 | pool->flags &= ~POOL_MANAGING_WORKERS; |
| 1885 | mutex_unlock(&pool->manager_mutex); | 2134 | mutex_unlock(&pool->assoc_mutex); |
| 1886 | return ret; | 2135 | return ret; |
| 1887 | } | 2136 | } |
| 1888 | 2137 | ||
| 1889 | /** | 2138 | /** |
| 1890 | * move_linked_works - move linked works to a list | ||
| 1891 | * @work: start of series of works to be scheduled | ||
| 1892 | * @head: target list to append @work to | ||
| 1893 | * @nextp: out paramter for nested worklist walking | ||
| 1894 | * | ||
| 1895 | * Schedule linked works starting from @work to @head. Work series to | ||
| 1896 | * be scheduled starts at @work and includes any consecutive work with | ||
| 1897 | * WORK_STRUCT_LINKED set in its predecessor. | ||
| 1898 | * | ||
| 1899 | * If @nextp is not NULL, it's updated to point to the next work of | ||
| 1900 | * the last scheduled work. This allows move_linked_works() to be | ||
| 1901 | * nested inside outer list_for_each_entry_safe(). | ||
| 1902 | * | ||
| 1903 | * CONTEXT: | ||
| 1904 | * spin_lock_irq(gcwq->lock). | ||
| 1905 | */ | ||
| 1906 | static void move_linked_works(struct work_struct *work, struct list_head *head, | ||
| 1907 | struct work_struct **nextp) | ||
| 1908 | { | ||
| 1909 | struct work_struct *n; | ||
| 1910 | |||
| 1911 | /* | ||
| 1912 | * Linked worklist will always end before the end of the list, | ||
| 1913 | * use NULL for list head. | ||
| 1914 | */ | ||
| 1915 | list_for_each_entry_safe_from(work, n, NULL, entry) { | ||
| 1916 | list_move_tail(&work->entry, head); | ||
| 1917 | if (!(*work_data_bits(work) & WORK_STRUCT_LINKED)) | ||
| 1918 | break; | ||
| 1919 | } | ||
| 1920 | |||
| 1921 | /* | ||
| 1922 | * If we're already inside safe list traversal and have moved | ||
| 1923 | * multiple works to the scheduled queue, the next position | ||
| 1924 | * needs to be updated. | ||
| 1925 | */ | ||
| 1926 | if (nextp) | ||
| 1927 | *nextp = n; | ||
| 1928 | } | ||
| 1929 | |||
| 1930 | static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) | ||
| 1931 | { | ||
| 1932 | struct work_struct *work = list_first_entry(&cwq->delayed_works, | ||
| 1933 | struct work_struct, entry); | ||
| 1934 | |||
| 1935 | trace_workqueue_activate_work(work); | ||
| 1936 | move_linked_works(work, &cwq->pool->worklist, NULL); | ||
| 1937 | __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); | ||
| 1938 | cwq->nr_active++; | ||
| 1939 | } | ||
| 1940 | |||
| 1941 | /** | ||
| 1942 | * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight | ||
| 1943 | * @cwq: cwq of interest | ||
| 1944 | * @color: color of work which left the queue | ||
| 1945 | * @delayed: for a delayed work | ||
| 1946 | * | ||
| 1947 | * A work either has completed or is removed from pending queue, | ||
| 1948 | * decrement nr_in_flight of its cwq and handle workqueue flushing. | ||
| 1949 | * | ||
| 1950 | * CONTEXT: | ||
| 1951 | * spin_lock_irq(gcwq->lock). | ||
| 1952 | */ | ||
| 1953 | static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color, | ||
| 1954 | bool delayed) | ||
| 1955 | { | ||
| 1956 | /* ignore uncolored works */ | ||
| 1957 | if (color == WORK_NO_COLOR) | ||
| 1958 | return; | ||
| 1959 | |||
| 1960 | cwq->nr_in_flight[color]--; | ||
| 1961 | |||
| 1962 | if (!delayed) { | ||
| 1963 | cwq->nr_active--; | ||
| 1964 | if (!list_empty(&cwq->delayed_works)) { | ||
| 1965 | /* one down, submit a delayed one */ | ||
| 1966 | if (cwq->nr_active < cwq->max_active) | ||
| 1967 | cwq_activate_first_delayed(cwq); | ||
| 1968 | } | ||
| 1969 | } | ||
| 1970 | |||
| 1971 | /* is flush in progress and are we at the flushing tip? */ | ||
| 1972 | if (likely(cwq->flush_color != color)) | ||
| 1973 | return; | ||
| 1974 | |||
| 1975 | /* are there still in-flight works? */ | ||
| 1976 | if (cwq->nr_in_flight[color]) | ||
| 1977 | return; | ||
| 1978 | |||
| 1979 | /* this cwq is done, clear flush_color */ | ||
| 1980 | cwq->flush_color = -1; | ||
| 1981 | |||
| 1982 | /* | ||
| 1983 | * If this was the last cwq, wake up the first flusher. It | ||
| 1984 | * will handle the rest. | ||
| 1985 | */ | ||
| 1986 | if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush)) | ||
| 1987 | complete(&cwq->wq->first_flusher->done); | ||
| 1988 | } | ||
| 1989 | |||
| 1990 | /** | ||
| 1991 | * process_one_work - process single work | 2139 | * process_one_work - process single work |
| 1992 | * @worker: self | 2140 | * @worker: self |
| 1993 | * @work: work to process | 2141 | * @work: work to process |
| @@ -2030,7 +2178,7 @@ __acquires(&gcwq->lock) | |||
| 2030 | * necessary to avoid spurious warnings from rescuers servicing the | 2178 | * necessary to avoid spurious warnings from rescuers servicing the |
| 2031 | * unbound or a disassociated gcwq. | 2179 | * unbound or a disassociated gcwq. |
| 2032 | */ | 2180 | */ |
| 2033 | WARN_ON_ONCE(!(worker->flags & (WORKER_UNBOUND | WORKER_REBIND)) && | 2181 | WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) && |
| 2034 | !(gcwq->flags & GCWQ_DISASSOCIATED) && | 2182 | !(gcwq->flags & GCWQ_DISASSOCIATED) && |
| 2035 | raw_smp_processor_id() != gcwq->cpu); | 2183 | raw_smp_processor_id() != gcwq->cpu); |
| 2036 | 2184 | ||
| @@ -2046,15 +2194,13 @@ __acquires(&gcwq->lock) | |||
| 2046 | return; | 2194 | return; |
| 2047 | } | 2195 | } |
| 2048 | 2196 | ||
| 2049 | /* claim and process */ | 2197 | /* claim and dequeue */ |
| 2050 | debug_work_deactivate(work); | 2198 | debug_work_deactivate(work); |
| 2051 | hlist_add_head(&worker->hentry, bwh); | 2199 | hlist_add_head(&worker->hentry, bwh); |
| 2052 | worker->current_work = work; | 2200 | worker->current_work = work; |
| 2053 | worker->current_cwq = cwq; | 2201 | worker->current_cwq = cwq; |
| 2054 | work_color = get_work_color(work); | 2202 | work_color = get_work_color(work); |
| 2055 | 2203 | ||
| 2056 | /* record the current cpu number in the work data and dequeue */ | ||
| 2057 | set_work_cpu(work, gcwq->cpu); | ||
| 2058 | list_del_init(&work->entry); | 2204 | list_del_init(&work->entry); |
| 2059 | 2205 | ||
| 2060 | /* | 2206 | /* |
| @@ -2071,9 +2217,16 @@ __acquires(&gcwq->lock) | |||
| 2071 | if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool)) | 2217 | if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool)) |
| 2072 | wake_up_worker(pool); | 2218 | wake_up_worker(pool); |
| 2073 | 2219 | ||
| 2220 | /* | ||
| 2221 | * Record the last CPU and clear PENDING which should be the last | ||
| 2222 | * update to @work. Also, do this inside @gcwq->lock so that | ||
| 2223 | * PENDING and queued state changes happen together while IRQ is | ||
| 2224 | * disabled. | ||
| 2225 | */ | ||
| 2226 | set_work_cpu_and_clear_pending(work, gcwq->cpu); | ||
| 2227 | |||
| 2074 | spin_unlock_irq(&gcwq->lock); | 2228 | spin_unlock_irq(&gcwq->lock); |
| 2075 | 2229 | ||
| 2076 | work_clear_pending(work); | ||
| 2077 | lock_map_acquire_read(&cwq->wq->lockdep_map); | 2230 | lock_map_acquire_read(&cwq->wq->lockdep_map); |
| 2078 | lock_map_acquire(&lockdep_map); | 2231 | lock_map_acquire(&lockdep_map); |
| 2079 | trace_workqueue_execute_start(work); | 2232 | trace_workqueue_execute_start(work); |
| @@ -2087,11 +2240,9 @@ __acquires(&gcwq->lock) | |||
| 2087 | lock_map_release(&cwq->wq->lockdep_map); | 2240 | lock_map_release(&cwq->wq->lockdep_map); |
| 2088 | 2241 | ||
| 2089 | if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { | 2242 | if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { |
| 2090 | printk(KERN_ERR "BUG: workqueue leaked lock or atomic: " | 2243 | pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n" |
| 2091 | "%s/0x%08x/%d\n", | 2244 | " last function: %pf\n", |
| 2092 | current->comm, preempt_count(), task_pid_nr(current)); | 2245 | current->comm, preempt_count(), task_pid_nr(current), f); |
| 2093 | printk(KERN_ERR " last function: "); | ||
| 2094 | print_symbol("%s\n", (unsigned long)f); | ||
| 2095 | debug_show_held_locks(current); | 2246 | debug_show_held_locks(current); |
| 2096 | dump_stack(); | 2247 | dump_stack(); |
| 2097 | } | 2248 | } |
| @@ -2106,7 +2257,7 @@ __acquires(&gcwq->lock) | |||
| 2106 | hlist_del_init(&worker->hentry); | 2257 | hlist_del_init(&worker->hentry); |
| 2107 | worker->current_work = NULL; | 2258 | worker->current_work = NULL; |
| 2108 | worker->current_cwq = NULL; | 2259 | worker->current_cwq = NULL; |
| 2109 | cwq_dec_nr_in_flight(cwq, work_color, false); | 2260 | cwq_dec_nr_in_flight(cwq, work_color); |
| 2110 | } | 2261 | } |
| 2111 | 2262 | ||
| 2112 | /** | 2263 | /** |
| @@ -2151,18 +2302,17 @@ static int worker_thread(void *__worker) | |||
| 2151 | woke_up: | 2302 | woke_up: |
| 2152 | spin_lock_irq(&gcwq->lock); | 2303 | spin_lock_irq(&gcwq->lock); |
| 2153 | 2304 | ||
| 2154 | /* | 2305 | /* we are off idle list if destruction or rebind is requested */ |
| 2155 | * DIE can be set only while idle and REBIND set while busy has | 2306 | if (unlikely(list_empty(&worker->entry))) { |
| 2156 | * @worker->rebind_work scheduled. Checking here is enough. | ||
| 2157 | */ | ||
| 2158 | if (unlikely(worker->flags & (WORKER_REBIND | WORKER_DIE))) { | ||
| 2159 | spin_unlock_irq(&gcwq->lock); | 2307 | spin_unlock_irq(&gcwq->lock); |
| 2160 | 2308 | ||
| 2309 | /* if DIE is set, destruction is requested */ | ||
| 2161 | if (worker->flags & WORKER_DIE) { | 2310 | if (worker->flags & WORKER_DIE) { |
| 2162 | worker->task->flags &= ~PF_WQ_WORKER; | 2311 | worker->task->flags &= ~PF_WQ_WORKER; |
| 2163 | return 0; | 2312 | return 0; |
| 2164 | } | 2313 | } |
| 2165 | 2314 | ||
| 2315 | /* otherwise, rebind */ | ||
| 2166 | idle_worker_rebind(worker); | 2316 | idle_worker_rebind(worker); |
| 2167 | goto woke_up; | 2317 | goto woke_up; |
| 2168 | } | 2318 | } |
| @@ -2645,8 +2795,8 @@ reflush: | |||
| 2645 | 2795 | ||
| 2646 | if (++flush_cnt == 10 || | 2796 | if (++flush_cnt == 10 || |
| 2647 | (flush_cnt % 100 == 0 && flush_cnt <= 1000)) | 2797 | (flush_cnt % 100 == 0 && flush_cnt <= 1000)) |
| 2648 | pr_warning("workqueue %s: flush on destruction isn't complete after %u tries\n", | 2798 | pr_warn("workqueue %s: flush on destruction isn't complete after %u tries\n", |
| 2649 | wq->name, flush_cnt); | 2799 | wq->name, flush_cnt); |
| 2650 | goto reflush; | 2800 | goto reflush; |
| 2651 | } | 2801 | } |
| 2652 | 2802 | ||
| @@ -2657,8 +2807,7 @@ reflush: | |||
| 2657 | } | 2807 | } |
| 2658 | EXPORT_SYMBOL_GPL(drain_workqueue); | 2808 | EXPORT_SYMBOL_GPL(drain_workqueue); |
| 2659 | 2809 | ||
| 2660 | static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, | 2810 | static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) |
| 2661 | bool wait_executing) | ||
| 2662 | { | 2811 | { |
| 2663 | struct worker *worker = NULL; | 2812 | struct worker *worker = NULL; |
| 2664 | struct global_cwq *gcwq; | 2813 | struct global_cwq *gcwq; |
| @@ -2680,13 +2829,12 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, | |||
| 2680 | cwq = get_work_cwq(work); | 2829 | cwq = get_work_cwq(work); |
| 2681 | if (unlikely(!cwq || gcwq != cwq->pool->gcwq)) | 2830 | if (unlikely(!cwq || gcwq != cwq->pool->gcwq)) |
| 2682 | goto already_gone; | 2831 | goto already_gone; |
| 2683 | } else if (wait_executing) { | 2832 | } else { |
| 2684 | worker = find_worker_executing_work(gcwq, work); | 2833 | worker = find_worker_executing_work(gcwq, work); |
| 2685 | if (!worker) | 2834 | if (!worker) |
| 2686 | goto already_gone; | 2835 | goto already_gone; |
| 2687 | cwq = worker->current_cwq; | 2836 | cwq = worker->current_cwq; |
| 2688 | } else | 2837 | } |
| 2689 | goto already_gone; | ||
| 2690 | 2838 | ||
| 2691 | insert_wq_barrier(cwq, barr, work, worker); | 2839 | insert_wq_barrier(cwq, barr, work, worker); |
| 2692 | spin_unlock_irq(&gcwq->lock); | 2840 | spin_unlock_irq(&gcwq->lock); |
| @@ -2713,15 +2861,8 @@ already_gone: | |||
| 2713 | * flush_work - wait for a work to finish executing the last queueing instance | 2861 | * flush_work - wait for a work to finish executing the last queueing instance |
| 2714 | * @work: the work to flush | 2862 | * @work: the work to flush |
| 2715 | * | 2863 | * |
| 2716 | * Wait until @work has finished execution. This function considers | 2864 | * Wait until @work has finished execution. @work is guaranteed to be idle |
| 2717 | * only the last queueing instance of @work. If @work has been | 2865 | * on return if it hasn't been requeued since flush started. |
| 2718 | * enqueued across different CPUs on a non-reentrant workqueue or on | ||
| 2719 | * multiple workqueues, @work might still be executing on return on | ||
| 2720 | * some of the CPUs from earlier queueing. | ||
| 2721 | * | ||
| 2722 | * If @work was queued only on a non-reentrant, ordered or unbound | ||
| 2723 | * workqueue, @work is guaranteed to be idle on return if it hasn't | ||
| 2724 | * been requeued since flush started. | ||
| 2725 | * | 2866 | * |
| 2726 | * RETURNS: | 2867 | * RETURNS: |
| 2727 | * %true if flush_work() waited for the work to finish execution, | 2868 | * %true if flush_work() waited for the work to finish execution, |
| @@ -2734,140 +2875,36 @@ bool flush_work(struct work_struct *work) | |||
| 2734 | lock_map_acquire(&work->lockdep_map); | 2875 | lock_map_acquire(&work->lockdep_map); |
| 2735 | lock_map_release(&work->lockdep_map); | 2876 | lock_map_release(&work->lockdep_map); |
| 2736 | 2877 | ||
| 2737 | if (start_flush_work(work, &barr, true)) { | 2878 | if (start_flush_work(work, &barr)) { |
| 2738 | wait_for_completion(&barr.done); | 2879 | wait_for_completion(&barr.done); |
| 2739 | destroy_work_on_stack(&barr.work); | 2880 | destroy_work_on_stack(&barr.work); |
| 2740 | return true; | 2881 | return true; |
| 2741 | } else | 2882 | } else { |
| 2742 | return false; | ||
| 2743 | } | ||
| 2744 | EXPORT_SYMBOL_GPL(flush_work); | ||
| 2745 | |||
| 2746 | static bool wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work) | ||
| 2747 | { | ||
| 2748 | struct wq_barrier barr; | ||
| 2749 | struct worker *worker; | ||
| 2750 | |||
| 2751 | spin_lock_irq(&gcwq->lock); | ||
| 2752 | |||
| 2753 | worker = find_worker_executing_work(gcwq, work); | ||
| 2754 | if (unlikely(worker)) | ||
| 2755 | insert_wq_barrier(worker->current_cwq, &barr, work, worker); | ||
| 2756 | |||
| 2757 | spin_unlock_irq(&gcwq->lock); | ||
| 2758 | |||
| 2759 | if (unlikely(worker)) { | ||
| 2760 | wait_for_completion(&barr.done); | ||
| 2761 | destroy_work_on_stack(&barr.work); | ||
| 2762 | return true; | ||
| 2763 | } else | ||
| 2764 | return false; | 2883 | return false; |
| 2765 | } | ||
| 2766 | |||
| 2767 | static bool wait_on_work(struct work_struct *work) | ||
| 2768 | { | ||
| 2769 | bool ret = false; | ||
| 2770 | int cpu; | ||
| 2771 | |||
| 2772 | might_sleep(); | ||
| 2773 | |||
| 2774 | lock_map_acquire(&work->lockdep_map); | ||
| 2775 | lock_map_release(&work->lockdep_map); | ||
| 2776 | |||
| 2777 | for_each_gcwq_cpu(cpu) | ||
| 2778 | ret |= wait_on_cpu_work(get_gcwq(cpu), work); | ||
| 2779 | return ret; | ||
| 2780 | } | ||
| 2781 | |||
| 2782 | /** | ||
| 2783 | * flush_work_sync - wait until a work has finished execution | ||
| 2784 | * @work: the work to flush | ||
| 2785 | * | ||
| 2786 | * Wait until @work has finished execution. On return, it's | ||
| 2787 | * guaranteed that all queueing instances of @work which happened | ||
| 2788 | * before this function is called are finished. In other words, if | ||
| 2789 | * @work hasn't been requeued since this function was called, @work is | ||
| 2790 | * guaranteed to be idle on return. | ||
| 2791 | * | ||
| 2792 | * RETURNS: | ||
| 2793 | * %true if flush_work_sync() waited for the work to finish execution, | ||
| 2794 | * %false if it was already idle. | ||
| 2795 | */ | ||
| 2796 | bool flush_work_sync(struct work_struct *work) | ||
| 2797 | { | ||
| 2798 | struct wq_barrier barr; | ||
| 2799 | bool pending, waited; | ||
| 2800 | |||
| 2801 | /* we'll wait for executions separately, queue barr only if pending */ | ||
| 2802 | pending = start_flush_work(work, &barr, false); | ||
| 2803 | |||
| 2804 | /* wait for executions to finish */ | ||
| 2805 | waited = wait_on_work(work); | ||
| 2806 | |||
| 2807 | /* wait for the pending one */ | ||
| 2808 | if (pending) { | ||
| 2809 | wait_for_completion(&barr.done); | ||
| 2810 | destroy_work_on_stack(&barr.work); | ||
| 2811 | } | 2884 | } |
| 2812 | |||
| 2813 | return pending || waited; | ||
| 2814 | } | ||
| 2815 | EXPORT_SYMBOL_GPL(flush_work_sync); | ||
| 2816 | |||
| 2817 | /* | ||
| 2818 | * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, | ||
| 2819 | * so this work can't be re-armed in any way. | ||
| 2820 | */ | ||
| 2821 | static int try_to_grab_pending(struct work_struct *work) | ||
| 2822 | { | ||
| 2823 | struct global_cwq *gcwq; | ||
| 2824 | int ret = -1; | ||
| 2825 | |||
| 2826 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) | ||
| 2827 | return 0; | ||
| 2828 | |||
| 2829 | /* | ||
| 2830 | * The queueing is in progress, or it is already queued. Try to | ||
| 2831 | * steal it from ->worklist without clearing WORK_STRUCT_PENDING. | ||
| 2832 | */ | ||
| 2833 | gcwq = get_work_gcwq(work); | ||
| 2834 | if (!gcwq) | ||
| 2835 | return ret; | ||
| 2836 | |||
| 2837 | spin_lock_irq(&gcwq->lock); | ||
| 2838 | if (!list_empty(&work->entry)) { | ||
| 2839 | /* | ||
| 2840 | * This work is queued, but perhaps we locked the wrong gcwq. | ||
| 2841 | * In that case we must see the new value after rmb(), see | ||
| 2842 | * insert_work()->wmb(). | ||
| 2843 | */ | ||
| 2844 | smp_rmb(); | ||
| 2845 | if (gcwq == get_work_gcwq(work)) { | ||
| 2846 | debug_work_deactivate(work); | ||
| 2847 | list_del_init(&work->entry); | ||
| 2848 | cwq_dec_nr_in_flight(get_work_cwq(work), | ||
| 2849 | get_work_color(work), | ||
| 2850 | *work_data_bits(work) & WORK_STRUCT_DELAYED); | ||
| 2851 | ret = 1; | ||
| 2852 | } | ||
| 2853 | } | ||
| 2854 | spin_unlock_irq(&gcwq->lock); | ||
| 2855 | |||
| 2856 | return ret; | ||
| 2857 | } | 2885 | } |
| 2886 | EXPORT_SYMBOL_GPL(flush_work); | ||
| 2858 | 2887 | ||
| 2859 | static bool __cancel_work_timer(struct work_struct *work, | 2888 | static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) |
| 2860 | struct timer_list* timer) | ||
| 2861 | { | 2889 | { |
| 2890 | unsigned long flags; | ||
| 2862 | int ret; | 2891 | int ret; |
| 2863 | 2892 | ||
| 2864 | do { | 2893 | do { |
| 2865 | ret = (timer && likely(del_timer(timer))); | 2894 | ret = try_to_grab_pending(work, is_dwork, &flags); |
| 2866 | if (!ret) | 2895 | /* |
| 2867 | ret = try_to_grab_pending(work); | 2896 | * If someone else is canceling, wait for the same event it |
| 2868 | wait_on_work(work); | 2897 | * would be waiting for before retrying. |
| 2898 | */ | ||
| 2899 | if (unlikely(ret == -ENOENT)) | ||
| 2900 | flush_work(work); | ||
| 2869 | } while (unlikely(ret < 0)); | 2901 | } while (unlikely(ret < 0)); |
| 2870 | 2902 | ||
| 2903 | /* tell other tasks trying to grab @work to back off */ | ||
| 2904 | mark_work_canceling(work); | ||
| 2905 | local_irq_restore(flags); | ||
| 2906 | |||
| 2907 | flush_work(work); | ||
| 2871 | clear_work_data(work); | 2908 | clear_work_data(work); |
| 2872 | return ret; | 2909 | return ret; |
| 2873 | } | 2910 | } |
| @@ -2892,7 +2929,7 @@ static bool __cancel_work_timer(struct work_struct *work, | |||
| 2892 | */ | 2929 | */ |
| 2893 | bool cancel_work_sync(struct work_struct *work) | 2930 | bool cancel_work_sync(struct work_struct *work) |
| 2894 | { | 2931 | { |
| 2895 | return __cancel_work_timer(work, NULL); | 2932 | return __cancel_work_timer(work, false); |
| 2896 | } | 2933 | } |
| 2897 | EXPORT_SYMBOL_GPL(cancel_work_sync); | 2934 | EXPORT_SYMBOL_GPL(cancel_work_sync); |
| 2898 | 2935 | ||
| @@ -2910,33 +2947,44 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); | |||
| 2910 | */ | 2947 | */ |
| 2911 | bool flush_delayed_work(struct delayed_work *dwork) | 2948 | bool flush_delayed_work(struct delayed_work *dwork) |
| 2912 | { | 2949 | { |
| 2950 | local_irq_disable(); | ||
| 2913 | if (del_timer_sync(&dwork->timer)) | 2951 | if (del_timer_sync(&dwork->timer)) |
| 2914 | __queue_work(raw_smp_processor_id(), | 2952 | __queue_work(dwork->cpu, |
| 2915 | get_work_cwq(&dwork->work)->wq, &dwork->work); | 2953 | get_work_cwq(&dwork->work)->wq, &dwork->work); |
| 2954 | local_irq_enable(); | ||
| 2916 | return flush_work(&dwork->work); | 2955 | return flush_work(&dwork->work); |
| 2917 | } | 2956 | } |
| 2918 | EXPORT_SYMBOL(flush_delayed_work); | 2957 | EXPORT_SYMBOL(flush_delayed_work); |
| 2919 | 2958 | ||
| 2920 | /** | 2959 | /** |
| 2921 | * flush_delayed_work_sync - wait for a dwork to finish | 2960 | * cancel_delayed_work - cancel a delayed work |
| 2922 | * @dwork: the delayed work to flush | 2961 | * @dwork: delayed_work to cancel |
| 2923 | * | 2962 | * |
| 2924 | * Delayed timer is cancelled and the pending work is queued for | 2963 | * Kill off a pending delayed_work. Returns %true if @dwork was pending |
| 2925 | * execution immediately. Other than timer handling, its behavior | 2964 | * and canceled; %false if wasn't pending. Note that the work callback |
| 2926 | * is identical to flush_work_sync(). | 2965 | * function may still be running on return, unless it returns %true and the |
| 2966 | * work doesn't re-arm itself. Explicitly flush or use | ||
| 2967 | * cancel_delayed_work_sync() to wait on it. | ||
| 2927 | * | 2968 | * |
| 2928 | * RETURNS: | 2969 | * This function is safe to call from any context including IRQ handler. |
| 2929 | * %true if flush_work_sync() waited for the work to finish execution, | ||
| 2930 | * %false if it was already idle. | ||
| 2931 | */ | 2970 | */ |
| 2932 | bool flush_delayed_work_sync(struct delayed_work *dwork) | 2971 | bool cancel_delayed_work(struct delayed_work *dwork) |
| 2933 | { | 2972 | { |
| 2934 | if (del_timer_sync(&dwork->timer)) | 2973 | unsigned long flags; |
| 2935 | __queue_work(raw_smp_processor_id(), | 2974 | int ret; |
| 2936 | get_work_cwq(&dwork->work)->wq, &dwork->work); | 2975 | |
| 2937 | return flush_work_sync(&dwork->work); | 2976 | do { |
| 2977 | ret = try_to_grab_pending(&dwork->work, true, &flags); | ||
| 2978 | } while (unlikely(ret == -EAGAIN)); | ||
| 2979 | |||
| 2980 | if (unlikely(ret < 0)) | ||
| 2981 | return false; | ||
| 2982 | |||
| 2983 | set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work)); | ||
| 2984 | local_irq_restore(flags); | ||
| 2985 | return true; | ||
| 2938 | } | 2986 | } |
| 2939 | EXPORT_SYMBOL(flush_delayed_work_sync); | 2987 | EXPORT_SYMBOL(cancel_delayed_work); |
| 2940 | 2988 | ||
| 2941 | /** | 2989 | /** |
| 2942 | * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish | 2990 | * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish |
| @@ -2949,54 +2997,39 @@ EXPORT_SYMBOL(flush_delayed_work_sync); | |||
| 2949 | */ | 2997 | */ |
| 2950 | bool cancel_delayed_work_sync(struct delayed_work *dwork) | 2998 | bool cancel_delayed_work_sync(struct delayed_work *dwork) |
| 2951 | { | 2999 | { |
| 2952 | return __cancel_work_timer(&dwork->work, &dwork->timer); | 3000 | return __cancel_work_timer(&dwork->work, true); |
| 2953 | } | 3001 | } |
| 2954 | EXPORT_SYMBOL(cancel_delayed_work_sync); | 3002 | EXPORT_SYMBOL(cancel_delayed_work_sync); |
| 2955 | 3003 | ||
| 2956 | /** | 3004 | /** |
| 2957 | * schedule_work - put work task in global workqueue | ||
| 2958 | * @work: job to be done | ||
| 2959 | * | ||
| 2960 | * Returns zero if @work was already on the kernel-global workqueue and | ||
| 2961 | * non-zero otherwise. | ||
| 2962 | * | ||
| 2963 | * This puts a job in the kernel-global workqueue if it was not already | ||
| 2964 | * queued and leaves it in the same position on the kernel-global | ||
| 2965 | * workqueue otherwise. | ||
| 2966 | */ | ||
| 2967 | int schedule_work(struct work_struct *work) | ||
| 2968 | { | ||
| 2969 | return queue_work(system_wq, work); | ||
| 2970 | } | ||
| 2971 | EXPORT_SYMBOL(schedule_work); | ||
| 2972 | |||
| 2973 | /* | ||
| 2974 | * schedule_work_on - put work task on a specific cpu | 3005 | * schedule_work_on - put work task on a specific cpu |
| 2975 | * @cpu: cpu to put the work task on | 3006 | * @cpu: cpu to put the work task on |
| 2976 | * @work: job to be done | 3007 | * @work: job to be done |
| 2977 | * | 3008 | * |
| 2978 | * This puts a job on a specific cpu | 3009 | * This puts a job on a specific cpu |
| 2979 | */ | 3010 | */ |
| 2980 | int schedule_work_on(int cpu, struct work_struct *work) | 3011 | bool schedule_work_on(int cpu, struct work_struct *work) |
| 2981 | { | 3012 | { |
| 2982 | return queue_work_on(cpu, system_wq, work); | 3013 | return queue_work_on(cpu, system_wq, work); |
| 2983 | } | 3014 | } |
| 2984 | EXPORT_SYMBOL(schedule_work_on); | 3015 | EXPORT_SYMBOL(schedule_work_on); |
| 2985 | 3016 | ||
| 2986 | /** | 3017 | /** |
| 2987 | * schedule_delayed_work - put work task in global workqueue after delay | 3018 | * schedule_work - put work task in global workqueue |
| 2988 | * @dwork: job to be done | 3019 | * @work: job to be done |
| 2989 | * @delay: number of jiffies to wait or 0 for immediate execution | ||
| 2990 | * | 3020 | * |
| 2991 | * After waiting for a given time this puts a job in the kernel-global | 3021 | * Returns %false if @work was already on the kernel-global workqueue and |
| 2992 | * workqueue. | 3022 | * %true otherwise. |
| 3023 | * | ||
| 3024 | * This puts a job in the kernel-global workqueue if it was not already | ||
| 3025 | * queued and leaves it in the same position on the kernel-global | ||
| 3026 | * workqueue otherwise. | ||
| 2993 | */ | 3027 | */ |
| 2994 | int schedule_delayed_work(struct delayed_work *dwork, | 3028 | bool schedule_work(struct work_struct *work) |
| 2995 | unsigned long delay) | ||
| 2996 | { | 3029 | { |
| 2997 | return queue_delayed_work(system_wq, dwork, delay); | 3030 | return queue_work(system_wq, work); |
| 2998 | } | 3031 | } |
| 2999 | EXPORT_SYMBOL(schedule_delayed_work); | 3032 | EXPORT_SYMBOL(schedule_work); |
| 3000 | 3033 | ||
| 3001 | /** | 3034 | /** |
| 3002 | * schedule_delayed_work_on - queue work in global workqueue on CPU after delay | 3035 | * schedule_delayed_work_on - queue work in global workqueue on CPU after delay |
| @@ -3007,14 +3040,28 @@ EXPORT_SYMBOL(schedule_delayed_work); | |||
| 3007 | * After waiting for a given time this puts a job in the kernel-global | 3040 | * After waiting for a given time this puts a job in the kernel-global |
| 3008 | * workqueue on the specified CPU. | 3041 | * workqueue on the specified CPU. |
| 3009 | */ | 3042 | */ |
| 3010 | int schedule_delayed_work_on(int cpu, | 3043 | bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, |
| 3011 | struct delayed_work *dwork, unsigned long delay) | 3044 | unsigned long delay) |
| 3012 | { | 3045 | { |
| 3013 | return queue_delayed_work_on(cpu, system_wq, dwork, delay); | 3046 | return queue_delayed_work_on(cpu, system_wq, dwork, delay); |
| 3014 | } | 3047 | } |
| 3015 | EXPORT_SYMBOL(schedule_delayed_work_on); | 3048 | EXPORT_SYMBOL(schedule_delayed_work_on); |
| 3016 | 3049 | ||
| 3017 | /** | 3050 | /** |
| 3051 | * schedule_delayed_work - put work task in global workqueue after delay | ||
| 3052 | * @dwork: job to be done | ||
| 3053 | * @delay: number of jiffies to wait or 0 for immediate execution | ||
| 3054 | * | ||
| 3055 | * After waiting for a given time this puts a job in the kernel-global | ||
| 3056 | * workqueue. | ||
| 3057 | */ | ||
| 3058 | bool schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) | ||
| 3059 | { | ||
| 3060 | return queue_delayed_work(system_wq, dwork, delay); | ||
| 3061 | } | ||
| 3062 | EXPORT_SYMBOL(schedule_delayed_work); | ||
| 3063 | |||
| 3064 | /** | ||
| 3018 | * schedule_on_each_cpu - execute a function synchronously on each online CPU | 3065 | * schedule_on_each_cpu - execute a function synchronously on each online CPU |
| 3019 | * @func: the function to call | 3066 | * @func: the function to call |
| 3020 | * | 3067 | * |
| @@ -3161,9 +3208,8 @@ static int wq_clamp_max_active(int max_active, unsigned int flags, | |||
| 3161 | int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE; | 3208 | int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE; |
| 3162 | 3209 | ||
| 3163 | if (max_active < 1 || max_active > lim) | 3210 | if (max_active < 1 || max_active > lim) |
| 3164 | printk(KERN_WARNING "workqueue: max_active %d requested for %s " | 3211 | pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n", |
| 3165 | "is out of range, clamping between %d and %d\n", | 3212 | max_active, name, 1, lim); |
| 3166 | max_active, name, 1, lim); | ||
| 3167 | 3213 | ||
| 3168 | return clamp_val(max_active, 1, lim); | 3214 | return clamp_val(max_active, 1, lim); |
| 3169 | } | 3215 | } |
| @@ -3319,6 +3365,26 @@ void destroy_workqueue(struct workqueue_struct *wq) | |||
| 3319 | EXPORT_SYMBOL_GPL(destroy_workqueue); | 3365 | EXPORT_SYMBOL_GPL(destroy_workqueue); |
| 3320 | 3366 | ||
| 3321 | /** | 3367 | /** |
| 3368 | * cwq_set_max_active - adjust max_active of a cwq | ||
| 3369 | * @cwq: target cpu_workqueue_struct | ||
| 3370 | * @max_active: new max_active value. | ||
| 3371 | * | ||
| 3372 | * Set @cwq->max_active to @max_active and activate delayed works if | ||
| 3373 | * increased. | ||
| 3374 | * | ||
| 3375 | * CONTEXT: | ||
| 3376 | * spin_lock_irq(gcwq->lock). | ||
| 3377 | */ | ||
| 3378 | static void cwq_set_max_active(struct cpu_workqueue_struct *cwq, int max_active) | ||
| 3379 | { | ||
| 3380 | cwq->max_active = max_active; | ||
| 3381 | |||
| 3382 | while (!list_empty(&cwq->delayed_works) && | ||
| 3383 | cwq->nr_active < cwq->max_active) | ||
| 3384 | cwq_activate_first_delayed(cwq); | ||
| 3385 | } | ||
| 3386 | |||
| 3387 | /** | ||
| 3322 | * workqueue_set_max_active - adjust max_active of a workqueue | 3388 | * workqueue_set_max_active - adjust max_active of a workqueue |
| 3323 | * @wq: target workqueue | 3389 | * @wq: target workqueue |
| 3324 | * @max_active: new max_active value. | 3390 | * @max_active: new max_active value. |
| @@ -3345,7 +3411,7 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) | |||
| 3345 | 3411 | ||
| 3346 | if (!(wq->flags & WQ_FREEZABLE) || | 3412 | if (!(wq->flags & WQ_FREEZABLE) || |
| 3347 | !(gcwq->flags & GCWQ_FREEZING)) | 3413 | !(gcwq->flags & GCWQ_FREEZING)) |
| 3348 | get_cwq(gcwq->cpu, wq)->max_active = max_active; | 3414 | cwq_set_max_active(get_cwq(gcwq->cpu, wq), max_active); |
| 3349 | 3415 | ||
| 3350 | spin_unlock_irq(&gcwq->lock); | 3416 | spin_unlock_irq(&gcwq->lock); |
| 3351 | } | 3417 | } |
| @@ -3440,23 +3506,23 @@ EXPORT_SYMBOL_GPL(work_busy); | |||
| 3440 | */ | 3506 | */ |
| 3441 | 3507 | ||
| 3442 | /* claim manager positions of all pools */ | 3508 | /* claim manager positions of all pools */ |
| 3443 | static void gcwq_claim_management_and_lock(struct global_cwq *gcwq) | 3509 | static void gcwq_claim_assoc_and_lock(struct global_cwq *gcwq) |
| 3444 | { | 3510 | { |
| 3445 | struct worker_pool *pool; | 3511 | struct worker_pool *pool; |
| 3446 | 3512 | ||
| 3447 | for_each_worker_pool(pool, gcwq) | 3513 | for_each_worker_pool(pool, gcwq) |
| 3448 | mutex_lock_nested(&pool->manager_mutex, pool - gcwq->pools); | 3514 | mutex_lock_nested(&pool->assoc_mutex, pool - gcwq->pools); |
| 3449 | spin_lock_irq(&gcwq->lock); | 3515 | spin_lock_irq(&gcwq->lock); |
| 3450 | } | 3516 | } |
| 3451 | 3517 | ||
| 3452 | /* release manager positions */ | 3518 | /* release manager positions */ |
| 3453 | static void gcwq_release_management_and_unlock(struct global_cwq *gcwq) | 3519 | static void gcwq_release_assoc_and_unlock(struct global_cwq *gcwq) |
| 3454 | { | 3520 | { |
| 3455 | struct worker_pool *pool; | 3521 | struct worker_pool *pool; |
| 3456 | 3522 | ||
| 3457 | spin_unlock_irq(&gcwq->lock); | 3523 | spin_unlock_irq(&gcwq->lock); |
| 3458 | for_each_worker_pool(pool, gcwq) | 3524 | for_each_worker_pool(pool, gcwq) |
| 3459 | mutex_unlock(&pool->manager_mutex); | 3525 | mutex_unlock(&pool->assoc_mutex); |
| 3460 | } | 3526 | } |
| 3461 | 3527 | ||
| 3462 | static void gcwq_unbind_fn(struct work_struct *work) | 3528 | static void gcwq_unbind_fn(struct work_struct *work) |
| @@ -3469,7 +3535,7 @@ static void gcwq_unbind_fn(struct work_struct *work) | |||
| 3469 | 3535 | ||
| 3470 | BUG_ON(gcwq->cpu != smp_processor_id()); | 3536 | BUG_ON(gcwq->cpu != smp_processor_id()); |
| 3471 | 3537 | ||
| 3472 | gcwq_claim_management_and_lock(gcwq); | 3538 | gcwq_claim_assoc_and_lock(gcwq); |
| 3473 | 3539 | ||
| 3474 | /* | 3540 | /* |
| 3475 | * We've claimed all manager positions. Make all workers unbound | 3541 | * We've claimed all manager positions. Make all workers unbound |
| @@ -3486,7 +3552,7 @@ static void gcwq_unbind_fn(struct work_struct *work) | |||
| 3486 | 3552 | ||
| 3487 | gcwq->flags |= GCWQ_DISASSOCIATED; | 3553 | gcwq->flags |= GCWQ_DISASSOCIATED; |
| 3488 | 3554 | ||
| 3489 | gcwq_release_management_and_unlock(gcwq); | 3555 | gcwq_release_assoc_and_unlock(gcwq); |
| 3490 | 3556 | ||
| 3491 | /* | 3557 | /* |
| 3492 | * Call schedule() so that we cross rq->lock and thus can guarantee | 3558 | * Call schedule() so that we cross rq->lock and thus can guarantee |
| @@ -3514,7 +3580,7 @@ static void gcwq_unbind_fn(struct work_struct *work) | |||
| 3514 | * Workqueues should be brought up before normal priority CPU notifiers. | 3580 | * Workqueues should be brought up before normal priority CPU notifiers. |
| 3515 | * This will be registered high priority CPU notifier. | 3581 | * This will be registered high priority CPU notifier. |
| 3516 | */ | 3582 | */ |
| 3517 | static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb, | 3583 | static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb, |
| 3518 | unsigned long action, | 3584 | unsigned long action, |
| 3519 | void *hcpu) | 3585 | void *hcpu) |
| 3520 | { | 3586 | { |
| @@ -3542,10 +3608,10 @@ static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb, | |||
| 3542 | 3608 | ||
| 3543 | case CPU_DOWN_FAILED: | 3609 | case CPU_DOWN_FAILED: |
| 3544 | case CPU_ONLINE: | 3610 | case CPU_ONLINE: |
| 3545 | gcwq_claim_management_and_lock(gcwq); | 3611 | gcwq_claim_assoc_and_lock(gcwq); |
| 3546 | gcwq->flags &= ~GCWQ_DISASSOCIATED; | 3612 | gcwq->flags &= ~GCWQ_DISASSOCIATED; |
| 3547 | rebind_workers(gcwq); | 3613 | rebind_workers(gcwq); |
| 3548 | gcwq_release_management_and_unlock(gcwq); | 3614 | gcwq_release_assoc_and_unlock(gcwq); |
| 3549 | break; | 3615 | break; |
| 3550 | } | 3616 | } |
| 3551 | return NOTIFY_OK; | 3617 | return NOTIFY_OK; |
| @@ -3555,7 +3621,7 @@ static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb, | |||
| 3555 | * Workqueues should be brought down after normal priority CPU notifiers. | 3621 | * Workqueues should be brought down after normal priority CPU notifiers. |
| 3556 | * This will be registered as low priority CPU notifier. | 3622 | * This will be registered as low priority CPU notifier. |
| 3557 | */ | 3623 | */ |
| 3558 | static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb, | 3624 | static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb, |
| 3559 | unsigned long action, | 3625 | unsigned long action, |
| 3560 | void *hcpu) | 3626 | void *hcpu) |
| 3561 | { | 3627 | { |
| @@ -3566,7 +3632,7 @@ static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb, | |||
| 3566 | case CPU_DOWN_PREPARE: | 3632 | case CPU_DOWN_PREPARE: |
| 3567 | /* unbinding should happen on the local CPU */ | 3633 | /* unbinding should happen on the local CPU */ |
| 3568 | INIT_WORK_ONSTACK(&unbind_work, gcwq_unbind_fn); | 3634 | INIT_WORK_ONSTACK(&unbind_work, gcwq_unbind_fn); |
| 3569 | schedule_work_on(cpu, &unbind_work); | 3635 | queue_work_on(cpu, system_highpri_wq, &unbind_work); |
| 3570 | flush_work(&unbind_work); | 3636 | flush_work(&unbind_work); |
| 3571 | break; | 3637 | break; |
| 3572 | } | 3638 | } |
| @@ -3735,11 +3801,7 @@ void thaw_workqueues(void) | |||
| 3735 | continue; | 3801 | continue; |
| 3736 | 3802 | ||
| 3737 | /* restore max_active and repopulate worklist */ | 3803 | /* restore max_active and repopulate worklist */ |
| 3738 | cwq->max_active = wq->saved_max_active; | 3804 | cwq_set_max_active(cwq, wq->saved_max_active); |
| 3739 | |||
| 3740 | while (!list_empty(&cwq->delayed_works) && | ||
| 3741 | cwq->nr_active < cwq->max_active) | ||
| 3742 | cwq_activate_first_delayed(cwq); | ||
| 3743 | } | 3805 | } |
| 3744 | 3806 | ||
| 3745 | for_each_worker_pool(pool, gcwq) | 3807 | for_each_worker_pool(pool, gcwq) |
| @@ -3759,8 +3821,12 @@ static int __init init_workqueues(void) | |||
| 3759 | unsigned int cpu; | 3821 | unsigned int cpu; |
| 3760 | int i; | 3822 | int i; |
| 3761 | 3823 | ||
| 3824 | /* make sure we have enough bits for OFFQ CPU number */ | ||
| 3825 | BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) < | ||
| 3826 | WORK_CPU_LAST); | ||
| 3827 | |||
| 3762 | cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); | 3828 | cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); |
| 3763 | cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN); | 3829 | hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN); |
| 3764 | 3830 | ||
| 3765 | /* initialize gcwqs */ | 3831 | /* initialize gcwqs */ |
| 3766 | for_each_gcwq_cpu(cpu) { | 3832 | for_each_gcwq_cpu(cpu) { |
| @@ -3786,11 +3852,9 @@ static int __init init_workqueues(void) | |||
| 3786 | setup_timer(&pool->mayday_timer, gcwq_mayday_timeout, | 3852 | setup_timer(&pool->mayday_timer, gcwq_mayday_timeout, |
| 3787 | (unsigned long)pool); | 3853 | (unsigned long)pool); |
| 3788 | 3854 | ||
| 3789 | mutex_init(&pool->manager_mutex); | 3855 | mutex_init(&pool->assoc_mutex); |
| 3790 | ida_init(&pool->worker_ida); | 3856 | ida_init(&pool->worker_ida); |
| 3791 | } | 3857 | } |
| 3792 | |||
| 3793 | init_waitqueue_head(&gcwq->rebind_hold); | ||
| 3794 | } | 3858 | } |
| 3795 | 3859 | ||
| 3796 | /* create the initial worker */ | 3860 | /* create the initial worker */ |
| @@ -3813,17 +3877,14 @@ static int __init init_workqueues(void) | |||
| 3813 | } | 3877 | } |
| 3814 | 3878 | ||
| 3815 | system_wq = alloc_workqueue("events", 0, 0); | 3879 | system_wq = alloc_workqueue("events", 0, 0); |
| 3880 | system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0); | ||
| 3816 | system_long_wq = alloc_workqueue("events_long", 0, 0); | 3881 | system_long_wq = alloc_workqueue("events_long", 0, 0); |
| 3817 | system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); | ||
| 3818 | system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, | 3882 | system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, |
| 3819 | WQ_UNBOUND_MAX_ACTIVE); | 3883 | WQ_UNBOUND_MAX_ACTIVE); |
| 3820 | system_freezable_wq = alloc_workqueue("events_freezable", | 3884 | system_freezable_wq = alloc_workqueue("events_freezable", |
| 3821 | WQ_FREEZABLE, 0); | 3885 | WQ_FREEZABLE, 0); |
| 3822 | system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable", | 3886 | BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq || |
| 3823 | WQ_NON_REENTRANT | WQ_FREEZABLE, 0); | 3887 | !system_unbound_wq || !system_freezable_wq); |
| 3824 | BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq || | ||
| 3825 | !system_unbound_wq || !system_freezable_wq || | ||
| 3826 | !system_nrt_freezable_wq); | ||
| 3827 | return 0; | 3888 | return 0; |
| 3828 | } | 3889 | } |
| 3829 | early_initcall(init_workqueues); | 3890 | early_initcall(init_workqueues); |
