diff options
| author | H. Peter Anvin <hpa@linux.intel.com> | 2012-01-19 15:56:50 -0500 |
|---|---|---|
| committer | H. Peter Anvin <hpa@linux.intel.com> | 2012-01-19 15:56:50 -0500 |
| commit | 282f445a779ed76fca9884fe377bf56a3088b208 (patch) | |
| tree | d9abcf526baee0100672851e0a8894c19e762a39 /kernel | |
| parent | 68f30fbee19cc67849b9fa8e153ede70758afe81 (diff) | |
| parent | 90a4c0f51e8e44111a926be6f4c87af3938a79c3 (diff) | |
Merge remote-tracking branch 'linus/master' into x86/urgent
Diffstat (limited to 'kernel')
61 files changed, 2466 insertions, 1599 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index f70396e5a24b..2d9de86b7e76 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -23,6 +23,7 @@ CFLAGS_REMOVE_irq_work.o = -pg | |||
| 23 | endif | 23 | endif |
| 24 | 24 | ||
| 25 | obj-y += sched/ | 25 | obj-y += sched/ |
| 26 | obj-y += power/ | ||
| 26 | 27 | ||
| 27 | obj-$(CONFIG_FREEZER) += freezer.o | 28 | obj-$(CONFIG_FREEZER) += freezer.o |
| 28 | obj-$(CONFIG_PROFILING) += profile.o | 29 | obj-$(CONFIG_PROFILING) += profile.o |
| @@ -52,8 +53,6 @@ obj-$(CONFIG_PROVE_LOCKING) += spinlock.o | |||
| 52 | obj-$(CONFIG_UID16) += uid16.o | 53 | obj-$(CONFIG_UID16) += uid16.o |
| 53 | obj-$(CONFIG_MODULES) += module.o | 54 | obj-$(CONFIG_MODULES) += module.o |
| 54 | obj-$(CONFIG_KALLSYMS) += kallsyms.o | 55 | obj-$(CONFIG_KALLSYMS) += kallsyms.o |
| 55 | obj-$(CONFIG_PM) += power/ | ||
| 56 | obj-$(CONFIG_FREEZER) += power/ | ||
| 57 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o | 56 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o |
| 58 | obj-$(CONFIG_KEXEC) += kexec.o | 57 | obj-$(CONFIG_KEXEC) += kexec.o |
| 59 | obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o | 58 | obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o |
diff --git a/kernel/acct.c b/kernel/acct.c index 203dfead2e06..02e6167a53b0 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
| @@ -84,11 +84,10 @@ static void do_acct_process(struct bsd_acct_struct *acct, | |||
| 84 | * the cache line to have the data after getting the lock. | 84 | * the cache line to have the data after getting the lock. |
| 85 | */ | 85 | */ |
| 86 | struct bsd_acct_struct { | 86 | struct bsd_acct_struct { |
| 87 | volatile int active; | 87 | int active; |
| 88 | volatile int needcheck; | 88 | unsigned long needcheck; |
| 89 | struct file *file; | 89 | struct file *file; |
| 90 | struct pid_namespace *ns; | 90 | struct pid_namespace *ns; |
| 91 | struct timer_list timer; | ||
| 92 | struct list_head list; | 91 | struct list_head list; |
| 93 | }; | 92 | }; |
| 94 | 93 | ||
| @@ -96,15 +95,6 @@ static DEFINE_SPINLOCK(acct_lock); | |||
| 96 | static LIST_HEAD(acct_list); | 95 | static LIST_HEAD(acct_list); |
| 97 | 96 | ||
| 98 | /* | 97 | /* |
| 99 | * Called whenever the timer says to check the free space. | ||
| 100 | */ | ||
| 101 | static void acct_timeout(unsigned long x) | ||
| 102 | { | ||
| 103 | struct bsd_acct_struct *acct = (struct bsd_acct_struct *)x; | ||
| 104 | acct->needcheck = 1; | ||
| 105 | } | ||
| 106 | |||
| 107 | /* | ||
| 108 | * Check the amount of free space and suspend/resume accordingly. | 98 | * Check the amount of free space and suspend/resume accordingly. |
| 109 | */ | 99 | */ |
| 110 | static int check_free_space(struct bsd_acct_struct *acct, struct file *file) | 100 | static int check_free_space(struct bsd_acct_struct *acct, struct file *file) |
| @@ -112,12 +102,12 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file) | |||
| 112 | struct kstatfs sbuf; | 102 | struct kstatfs sbuf; |
| 113 | int res; | 103 | int res; |
| 114 | int act; | 104 | int act; |
| 115 | sector_t resume; | 105 | u64 resume; |
| 116 | sector_t suspend; | 106 | u64 suspend; |
| 117 | 107 | ||
| 118 | spin_lock(&acct_lock); | 108 | spin_lock(&acct_lock); |
| 119 | res = acct->active; | 109 | res = acct->active; |
| 120 | if (!file || !acct->needcheck) | 110 | if (!file || time_is_before_jiffies(acct->needcheck)) |
| 121 | goto out; | 111 | goto out; |
| 122 | spin_unlock(&acct_lock); | 112 | spin_unlock(&acct_lock); |
| 123 | 113 | ||
| @@ -127,8 +117,8 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file) | |||
| 127 | suspend = sbuf.f_blocks * SUSPEND; | 117 | suspend = sbuf.f_blocks * SUSPEND; |
| 128 | resume = sbuf.f_blocks * RESUME; | 118 | resume = sbuf.f_blocks * RESUME; |
| 129 | 119 | ||
| 130 | sector_div(suspend, 100); | 120 | do_div(suspend, 100); |
| 131 | sector_div(resume, 100); | 121 | do_div(resume, 100); |
| 132 | 122 | ||
| 133 | if (sbuf.f_bavail <= suspend) | 123 | if (sbuf.f_bavail <= suspend) |
| 134 | act = -1; | 124 | act = -1; |
| @@ -160,10 +150,7 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file) | |||
| 160 | } | 150 | } |
| 161 | } | 151 | } |
| 162 | 152 | ||
| 163 | del_timer(&acct->timer); | 153 | acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; |
| 164 | acct->needcheck = 0; | ||
| 165 | acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ; | ||
| 166 | add_timer(&acct->timer); | ||
| 167 | res = acct->active; | 154 | res = acct->active; |
| 168 | out: | 155 | out: |
| 169 | spin_unlock(&acct_lock); | 156 | spin_unlock(&acct_lock); |
| @@ -185,9 +172,7 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, | |||
| 185 | if (acct->file) { | 172 | if (acct->file) { |
| 186 | old_acct = acct->file; | 173 | old_acct = acct->file; |
| 187 | old_ns = acct->ns; | 174 | old_ns = acct->ns; |
| 188 | del_timer(&acct->timer); | ||
| 189 | acct->active = 0; | 175 | acct->active = 0; |
| 190 | acct->needcheck = 0; | ||
| 191 | acct->file = NULL; | 176 | acct->file = NULL; |
| 192 | acct->ns = NULL; | 177 | acct->ns = NULL; |
| 193 | list_del(&acct->list); | 178 | list_del(&acct->list); |
| @@ -195,13 +180,9 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, | |||
| 195 | if (file) { | 180 | if (file) { |
| 196 | acct->file = file; | 181 | acct->file = file; |
| 197 | acct->ns = ns; | 182 | acct->ns = ns; |
| 198 | acct->needcheck = 0; | 183 | acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; |
| 199 | acct->active = 1; | 184 | acct->active = 1; |
| 200 | list_add(&acct->list, &acct_list); | 185 | list_add(&acct->list, &acct_list); |
| 201 | /* It's been deleted if it was used before so this is safe */ | ||
| 202 | setup_timer(&acct->timer, acct_timeout, (unsigned long)acct); | ||
| 203 | acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ; | ||
| 204 | add_timer(&acct->timer); | ||
| 205 | } | 186 | } |
| 206 | if (old_acct) { | 187 | if (old_acct) { |
| 207 | mnt_unpin(old_acct->f_path.mnt); | 188 | mnt_unpin(old_acct->f_path.mnt); |
| @@ -334,7 +315,7 @@ void acct_auto_close(struct super_block *sb) | |||
| 334 | spin_lock(&acct_lock); | 315 | spin_lock(&acct_lock); |
| 335 | restart: | 316 | restart: |
| 336 | list_for_each_entry(acct, &acct_list, list) | 317 | list_for_each_entry(acct, &acct_list, list) |
| 337 | if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) { | 318 | if (acct->file && acct->file->f_path.dentry->d_sb == sb) { |
| 338 | acct_file_reopen(acct, NULL, NULL); | 319 | acct_file_reopen(acct, NULL, NULL); |
| 339 | goto restart; | 320 | goto restart; |
| 340 | } | 321 | } |
| @@ -348,7 +329,6 @@ void acct_exit_ns(struct pid_namespace *ns) | |||
| 348 | if (acct == NULL) | 329 | if (acct == NULL) |
| 349 | return; | 330 | return; |
| 350 | 331 | ||
| 351 | del_timer_sync(&acct->timer); | ||
| 352 | spin_lock(&acct_lock); | 332 | spin_lock(&acct_lock); |
| 353 | if (acct->file != NULL) | 333 | if (acct->file != NULL) |
| 354 | acct_file_reopen(acct, NULL, NULL); | 334 | acct_file_reopen(acct, NULL, NULL); |
| @@ -498,7 +478,7 @@ static void do_acct_process(struct bsd_acct_struct *acct, | |||
| 498 | * Fill the accounting struct with the needed info as recorded | 478 | * Fill the accounting struct with the needed info as recorded |
| 499 | * by the different kernel functions. | 479 | * by the different kernel functions. |
| 500 | */ | 480 | */ |
| 501 | memset((caddr_t)&ac, 0, sizeof(acct_t)); | 481 | memset(&ac, 0, sizeof(acct_t)); |
| 502 | 482 | ||
| 503 | ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER; | 483 | ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER; |
| 504 | strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm)); | 484 | strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm)); |
diff --git a/kernel/async.c b/kernel/async.c index 80b74b88fefe..bd0c168a3bbe 100644 --- a/kernel/async.c +++ b/kernel/async.c | |||
| @@ -78,8 +78,6 @@ static DECLARE_WAIT_QUEUE_HEAD(async_done); | |||
| 78 | 78 | ||
| 79 | static atomic_t entry_count; | 79 | static atomic_t entry_count; |
| 80 | 80 | ||
| 81 | extern int initcall_debug; | ||
| 82 | |||
| 83 | 81 | ||
| 84 | /* | 82 | /* |
| 85 | * MUST be called with the lock held! | 83 | * MUST be called with the lock held! |
diff --git a/kernel/audit.c b/kernel/audit.c index 09fae2677a45..bb0eb5bb9a0a 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
| @@ -601,13 +601,13 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) | |||
| 601 | case AUDIT_TTY_SET: | 601 | case AUDIT_TTY_SET: |
| 602 | case AUDIT_TRIM: | 602 | case AUDIT_TRIM: |
| 603 | case AUDIT_MAKE_EQUIV: | 603 | case AUDIT_MAKE_EQUIV: |
| 604 | if (security_netlink_recv(skb, CAP_AUDIT_CONTROL)) | 604 | if (!capable(CAP_AUDIT_CONTROL)) |
| 605 | err = -EPERM; | 605 | err = -EPERM; |
| 606 | break; | 606 | break; |
| 607 | case AUDIT_USER: | 607 | case AUDIT_USER: |
| 608 | case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: | 608 | case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: |
| 609 | case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: | 609 | case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: |
| 610 | if (security_netlink_recv(skb, CAP_AUDIT_WRITE)) | 610 | if (!capable(CAP_AUDIT_WRITE)) |
| 611 | err = -EPERM; | 611 | err = -EPERM; |
| 612 | break; | 612 | break; |
| 613 | default: /* bad msg */ | 613 | default: /* bad msg */ |
| @@ -631,7 +631,7 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, | |||
| 631 | } | 631 | } |
| 632 | 632 | ||
| 633 | *ab = audit_log_start(NULL, GFP_KERNEL, msg_type); | 633 | *ab = audit_log_start(NULL, GFP_KERNEL, msg_type); |
| 634 | audit_log_format(*ab, "user pid=%d uid=%u auid=%u ses=%u", | 634 | audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u", |
| 635 | pid, uid, auid, ses); | 635 | pid, uid, auid, ses); |
| 636 | if (sid) { | 636 | if (sid) { |
| 637 | rc = security_secid_to_secctx(sid, &ctx, &len); | 637 | rc = security_secid_to_secctx(sid, &ctx, &len); |
| @@ -1260,12 +1260,13 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt, | |||
| 1260 | avail = audit_expand(ab, | 1260 | avail = audit_expand(ab, |
| 1261 | max_t(unsigned, AUDIT_BUFSIZ, 1+len-avail)); | 1261 | max_t(unsigned, AUDIT_BUFSIZ, 1+len-avail)); |
| 1262 | if (!avail) | 1262 | if (!avail) |
| 1263 | goto out; | 1263 | goto out_va_end; |
| 1264 | len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2); | 1264 | len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2); |
| 1265 | } | 1265 | } |
| 1266 | va_end(args2); | ||
| 1267 | if (len > 0) | 1266 | if (len > 0) |
| 1268 | skb_put(skb, len); | 1267 | skb_put(skb, len); |
| 1268 | out_va_end: | ||
| 1269 | va_end(args2); | ||
| 1269 | out: | 1270 | out: |
| 1270 | return; | 1271 | return; |
| 1271 | } | 1272 | } |
| @@ -1422,7 +1423,7 @@ void audit_log_d_path(struct audit_buffer *ab, const char *prefix, | |||
| 1422 | char *p, *pathname; | 1423 | char *p, *pathname; |
| 1423 | 1424 | ||
| 1424 | if (prefix) | 1425 | if (prefix) |
| 1425 | audit_log_format(ab, " %s", prefix); | 1426 | audit_log_format(ab, "%s", prefix); |
| 1426 | 1427 | ||
| 1427 | /* We will allow 11 spaces for ' (deleted)' to be appended */ | 1428 | /* We will allow 11 spaces for ' (deleted)' to be appended */ |
| 1428 | pathname = kmalloc(PATH_MAX+11, ab->gfp_mask); | 1429 | pathname = kmalloc(PATH_MAX+11, ab->gfp_mask); |
diff --git a/kernel/audit.h b/kernel/audit.h index 91e7071c4d2c..816766803371 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
| @@ -36,12 +36,8 @@ enum audit_state { | |||
| 36 | AUDIT_DISABLED, /* Do not create per-task audit_context. | 36 | AUDIT_DISABLED, /* Do not create per-task audit_context. |
| 37 | * No syscall-specific audit records can | 37 | * No syscall-specific audit records can |
| 38 | * be generated. */ | 38 | * be generated. */ |
| 39 | AUDIT_SETUP_CONTEXT, /* Create the per-task audit_context, | ||
| 40 | * but don't necessarily fill it in at | ||
| 41 | * syscall entry time (i.e., filter | ||
| 42 | * instead). */ | ||
| 43 | AUDIT_BUILD_CONTEXT, /* Create the per-task audit_context, | 39 | AUDIT_BUILD_CONTEXT, /* Create the per-task audit_context, |
| 44 | * and always fill it in at syscall | 40 | * and fill it in at syscall |
| 45 | * entry time. This makes a full | 41 | * entry time. This makes a full |
| 46 | * syscall record available if some | 42 | * syscall record available if some |
| 47 | * other part of the kernel decides it | 43 | * other part of the kernel decides it |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index f8277c80d678..a6c3f1abd206 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
| @@ -235,13 +235,15 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) | |||
| 235 | switch(listnr) { | 235 | switch(listnr) { |
| 236 | default: | 236 | default: |
| 237 | goto exit_err; | 237 | goto exit_err; |
| 238 | case AUDIT_FILTER_USER: | ||
| 239 | case AUDIT_FILTER_TYPE: | ||
| 240 | #ifdef CONFIG_AUDITSYSCALL | 238 | #ifdef CONFIG_AUDITSYSCALL |
| 241 | case AUDIT_FILTER_ENTRY: | 239 | case AUDIT_FILTER_ENTRY: |
| 240 | if (rule->action == AUDIT_ALWAYS) | ||
| 241 | goto exit_err; | ||
| 242 | case AUDIT_FILTER_EXIT: | 242 | case AUDIT_FILTER_EXIT: |
| 243 | case AUDIT_FILTER_TASK: | 243 | case AUDIT_FILTER_TASK: |
| 244 | #endif | 244 | #endif |
| 245 | case AUDIT_FILTER_USER: | ||
| 246 | case AUDIT_FILTER_TYPE: | ||
| 245 | ; | 247 | ; |
| 246 | } | 248 | } |
| 247 | if (unlikely(rule->action == AUDIT_POSSIBLE)) { | 249 | if (unlikely(rule->action == AUDIT_POSSIBLE)) { |
| @@ -385,7 +387,7 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule) | |||
| 385 | goto exit_free; | 387 | goto exit_free; |
| 386 | break; | 388 | break; |
| 387 | case AUDIT_FILETYPE: | 389 | case AUDIT_FILETYPE: |
| 388 | if ((f->val & ~S_IFMT) > S_IFMT) | 390 | if (f->val & ~S_IFMT) |
| 389 | goto exit_free; | 391 | goto exit_free; |
| 390 | break; | 392 | break; |
| 391 | case AUDIT_INODE: | 393 | case AUDIT_INODE: |
| @@ -459,6 +461,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
| 459 | case AUDIT_ARG1: | 461 | case AUDIT_ARG1: |
| 460 | case AUDIT_ARG2: | 462 | case AUDIT_ARG2: |
| 461 | case AUDIT_ARG3: | 463 | case AUDIT_ARG3: |
| 464 | case AUDIT_OBJ_UID: | ||
| 465 | case AUDIT_OBJ_GID: | ||
| 462 | break; | 466 | break; |
| 463 | case AUDIT_ARCH: | 467 | case AUDIT_ARCH: |
| 464 | entry->rule.arch_f = f; | 468 | entry->rule.arch_f = f; |
| @@ -522,7 +526,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
| 522 | goto exit_free; | 526 | goto exit_free; |
| 523 | break; | 527 | break; |
| 524 | case AUDIT_FILTERKEY: | 528 | case AUDIT_FILTERKEY: |
| 525 | err = -EINVAL; | ||
| 526 | if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN) | 529 | if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN) |
| 527 | goto exit_free; | 530 | goto exit_free; |
| 528 | str = audit_unpack_string(&bufp, &remain, f->val); | 531 | str = audit_unpack_string(&bufp, &remain, f->val); |
| @@ -536,7 +539,11 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, | |||
| 536 | goto exit_free; | 539 | goto exit_free; |
| 537 | break; | 540 | break; |
| 538 | case AUDIT_FILETYPE: | 541 | case AUDIT_FILETYPE: |
| 539 | if ((f->val & ~S_IFMT) > S_IFMT) | 542 | if (f->val & ~S_IFMT) |
| 543 | goto exit_free; | ||
| 544 | break; | ||
| 545 | case AUDIT_FIELD_COMPARE: | ||
| 546 | if (f->val > AUDIT_MAX_FIELD_COMPARE) | ||
| 540 | goto exit_free; | 547 | goto exit_free; |
| 541 | break; | 548 | break; |
| 542 | default: | 549 | default: |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 47b7fc1ea893..caaea6e944f8 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
| @@ -70,9 +70,15 @@ | |||
| 70 | 70 | ||
| 71 | #include "audit.h" | 71 | #include "audit.h" |
| 72 | 72 | ||
| 73 | /* flags stating the success for a syscall */ | ||
| 74 | #define AUDITSC_INVALID 0 | ||
| 75 | #define AUDITSC_SUCCESS 1 | ||
| 76 | #define AUDITSC_FAILURE 2 | ||
| 77 | |||
| 73 | /* AUDIT_NAMES is the number of slots we reserve in the audit_context | 78 | /* AUDIT_NAMES is the number of slots we reserve in the audit_context |
| 74 | * for saving names from getname(). */ | 79 | * for saving names from getname(). If we get more names we will allocate |
| 75 | #define AUDIT_NAMES 20 | 80 | * a name dynamically and also add those to the list anchored by names_list. */ |
| 81 | #define AUDIT_NAMES 5 | ||
| 76 | 82 | ||
| 77 | /* Indicates that audit should log the full pathname. */ | 83 | /* Indicates that audit should log the full pathname. */ |
| 78 | #define AUDIT_NAME_FULL -1 | 84 | #define AUDIT_NAME_FULL -1 |
| @@ -101,9 +107,8 @@ struct audit_cap_data { | |||
| 101 | * | 107 | * |
| 102 | * Further, in fs/namei.c:path_lookup() we store the inode and device. */ | 108 | * Further, in fs/namei.c:path_lookup() we store the inode and device. */ |
| 103 | struct audit_names { | 109 | struct audit_names { |
| 110 | struct list_head list; /* audit_context->names_list */ | ||
| 104 | const char *name; | 111 | const char *name; |
| 105 | int name_len; /* number of name's characters to log */ | ||
| 106 | unsigned name_put; /* call __putname() for this name */ | ||
| 107 | unsigned long ino; | 112 | unsigned long ino; |
| 108 | dev_t dev; | 113 | dev_t dev; |
| 109 | umode_t mode; | 114 | umode_t mode; |
| @@ -113,6 +118,14 @@ struct audit_names { | |||
| 113 | u32 osid; | 118 | u32 osid; |
| 114 | struct audit_cap_data fcap; | 119 | struct audit_cap_data fcap; |
| 115 | unsigned int fcap_ver; | 120 | unsigned int fcap_ver; |
| 121 | int name_len; /* number of name's characters to log */ | ||
| 122 | bool name_put; /* call __putname() for this name */ | ||
| 123 | /* | ||
| 124 | * This was an allocated audit_names and not from the array of | ||
| 125 | * names allocated in the task audit context. Thus this name | ||
| 126 | * should be freed on syscall exit | ||
| 127 | */ | ||
| 128 | bool should_free; | ||
| 116 | }; | 129 | }; |
| 117 | 130 | ||
| 118 | struct audit_aux_data { | 131 | struct audit_aux_data { |
| @@ -174,8 +187,17 @@ struct audit_context { | |||
| 174 | long return_code;/* syscall return code */ | 187 | long return_code;/* syscall return code */ |
| 175 | u64 prio; | 188 | u64 prio; |
| 176 | int return_valid; /* return code is valid */ | 189 | int return_valid; /* return code is valid */ |
| 177 | int name_count; | 190 | /* |
| 178 | struct audit_names names[AUDIT_NAMES]; | 191 | * The names_list is the list of all audit_names collected during this |
| 192 | * syscall. The first AUDIT_NAMES entries in the names_list will | ||
| 193 | * actually be from the preallocated_names array for performance | ||
| 194 | * reasons. Except during allocation they should never be referenced | ||
| 195 | * through the preallocated_names array and should only be found/used | ||
| 196 | * by running the names_list. | ||
| 197 | */ | ||
| 198 | struct audit_names preallocated_names[AUDIT_NAMES]; | ||
| 199 | int name_count; /* total records in names_list */ | ||
| 200 | struct list_head names_list; /* anchor for struct audit_names->list */ | ||
| 179 | char * filterkey; /* key for rule that triggered record */ | 201 | char * filterkey; /* key for rule that triggered record */ |
| 180 | struct path pwd; | 202 | struct path pwd; |
| 181 | struct audit_context *previous; /* For nested syscalls */ | 203 | struct audit_context *previous; /* For nested syscalls */ |
| @@ -210,12 +232,12 @@ struct audit_context { | |||
| 210 | struct { | 232 | struct { |
| 211 | uid_t uid; | 233 | uid_t uid; |
| 212 | gid_t gid; | 234 | gid_t gid; |
| 213 | mode_t mode; | 235 | umode_t mode; |
| 214 | u32 osid; | 236 | u32 osid; |
| 215 | int has_perm; | 237 | int has_perm; |
| 216 | uid_t perm_uid; | 238 | uid_t perm_uid; |
| 217 | gid_t perm_gid; | 239 | gid_t perm_gid; |
| 218 | mode_t perm_mode; | 240 | umode_t perm_mode; |
| 219 | unsigned long qbytes; | 241 | unsigned long qbytes; |
| 220 | } ipc; | 242 | } ipc; |
| 221 | struct { | 243 | struct { |
| @@ -234,7 +256,7 @@ struct audit_context { | |||
| 234 | } mq_sendrecv; | 256 | } mq_sendrecv; |
| 235 | struct { | 257 | struct { |
| 236 | int oflag; | 258 | int oflag; |
| 237 | mode_t mode; | 259 | umode_t mode; |
| 238 | struct mq_attr attr; | 260 | struct mq_attr attr; |
| 239 | } mq_open; | 261 | } mq_open; |
| 240 | struct { | 262 | struct { |
| @@ -305,21 +327,21 @@ static int audit_match_perm(struct audit_context *ctx, int mask) | |||
| 305 | } | 327 | } |
| 306 | } | 328 | } |
| 307 | 329 | ||
| 308 | static int audit_match_filetype(struct audit_context *ctx, int which) | 330 | static int audit_match_filetype(struct audit_context *ctx, int val) |
| 309 | { | 331 | { |
| 310 | unsigned index = which & ~S_IFMT; | 332 | struct audit_names *n; |
| 311 | mode_t mode = which & S_IFMT; | 333 | umode_t mode = (umode_t)val; |
| 312 | 334 | ||
| 313 | if (unlikely(!ctx)) | 335 | if (unlikely(!ctx)) |
| 314 | return 0; | 336 | return 0; |
| 315 | 337 | ||
| 316 | if (index >= ctx->name_count) | 338 | list_for_each_entry(n, &ctx->names_list, list) { |
| 317 | return 0; | 339 | if ((n->ino != -1) && |
| 318 | if (ctx->names[index].ino == -1) | 340 | ((n->mode & S_IFMT) == mode)) |
| 319 | return 0; | 341 | return 1; |
| 320 | if ((ctx->names[index].mode ^ mode) & S_IFMT) | 342 | } |
| 321 | return 0; | 343 | |
| 322 | return 1; | 344 | return 0; |
| 323 | } | 345 | } |
| 324 | 346 | ||
| 325 | /* | 347 | /* |
| @@ -441,6 +463,134 @@ static int match_tree_refs(struct audit_context *ctx, struct audit_tree *tree) | |||
| 441 | return 0; | 463 | return 0; |
| 442 | } | 464 | } |
| 443 | 465 | ||
| 466 | static int audit_compare_id(uid_t uid1, | ||
| 467 | struct audit_names *name, | ||
| 468 | unsigned long name_offset, | ||
| 469 | struct audit_field *f, | ||
| 470 | struct audit_context *ctx) | ||
| 471 | { | ||
| 472 | struct audit_names *n; | ||
| 473 | unsigned long addr; | ||
| 474 | uid_t uid2; | ||
| 475 | int rc; | ||
| 476 | |||
| 477 | BUILD_BUG_ON(sizeof(uid_t) != sizeof(gid_t)); | ||
| 478 | |||
| 479 | if (name) { | ||
| 480 | addr = (unsigned long)name; | ||
| 481 | addr += name_offset; | ||
| 482 | |||
| 483 | uid2 = *(uid_t *)addr; | ||
| 484 | rc = audit_comparator(uid1, f->op, uid2); | ||
| 485 | if (rc) | ||
| 486 | return rc; | ||
| 487 | } | ||
| 488 | |||
| 489 | if (ctx) { | ||
| 490 | list_for_each_entry(n, &ctx->names_list, list) { | ||
| 491 | addr = (unsigned long)n; | ||
| 492 | addr += name_offset; | ||
| 493 | |||
| 494 | uid2 = *(uid_t *)addr; | ||
| 495 | |||
| 496 | rc = audit_comparator(uid1, f->op, uid2); | ||
| 497 | if (rc) | ||
| 498 | return rc; | ||
| 499 | } | ||
| 500 | } | ||
| 501 | return 0; | ||
| 502 | } | ||
| 503 | |||
| 504 | static int audit_field_compare(struct task_struct *tsk, | ||
| 505 | const struct cred *cred, | ||
| 506 | struct audit_field *f, | ||
| 507 | struct audit_context *ctx, | ||
| 508 | struct audit_names *name) | ||
| 509 | { | ||
| 510 | switch (f->val) { | ||
| 511 | /* process to file object comparisons */ | ||
| 512 | case AUDIT_COMPARE_UID_TO_OBJ_UID: | ||
| 513 | return audit_compare_id(cred->uid, | ||
| 514 | name, offsetof(struct audit_names, uid), | ||
| 515 | f, ctx); | ||
| 516 | case AUDIT_COMPARE_GID_TO_OBJ_GID: | ||
| 517 | return audit_compare_id(cred->gid, | ||
| 518 | name, offsetof(struct audit_names, gid), | ||
| 519 | f, ctx); | ||
| 520 | case AUDIT_COMPARE_EUID_TO_OBJ_UID: | ||
| 521 | return audit_compare_id(cred->euid, | ||
| 522 | name, offsetof(struct audit_names, uid), | ||
| 523 | f, ctx); | ||
| 524 | case AUDIT_COMPARE_EGID_TO_OBJ_GID: | ||
| 525 | return audit_compare_id(cred->egid, | ||
| 526 | name, offsetof(struct audit_names, gid), | ||
| 527 | f, ctx); | ||
| 528 | case AUDIT_COMPARE_AUID_TO_OBJ_UID: | ||
| 529 | return audit_compare_id(tsk->loginuid, | ||
| 530 | name, offsetof(struct audit_names, uid), | ||
| 531 | f, ctx); | ||
| 532 | case AUDIT_COMPARE_SUID_TO_OBJ_UID: | ||
| 533 | return audit_compare_id(cred->suid, | ||
| 534 | name, offsetof(struct audit_names, uid), | ||
| 535 | f, ctx); | ||
| 536 | case AUDIT_COMPARE_SGID_TO_OBJ_GID: | ||
| 537 | return audit_compare_id(cred->sgid, | ||
| 538 | name, offsetof(struct audit_names, gid), | ||
| 539 | f, ctx); | ||
| 540 | case AUDIT_COMPARE_FSUID_TO_OBJ_UID: | ||
| 541 | return audit_compare_id(cred->fsuid, | ||
| 542 | name, offsetof(struct audit_names, uid), | ||
| 543 | f, ctx); | ||
| 544 | case AUDIT_COMPARE_FSGID_TO_OBJ_GID: | ||
| 545 | return audit_compare_id(cred->fsgid, | ||
| 546 | name, offsetof(struct audit_names, gid), | ||
| 547 | f, ctx); | ||
| 548 | /* uid comparisons */ | ||
| 549 | case AUDIT_COMPARE_UID_TO_AUID: | ||
| 550 | return audit_comparator(cred->uid, f->op, tsk->loginuid); | ||
| 551 | case AUDIT_COMPARE_UID_TO_EUID: | ||
| 552 | return audit_comparator(cred->uid, f->op, cred->euid); | ||
| 553 | case AUDIT_COMPARE_UID_TO_SUID: | ||
| 554 | return audit_comparator(cred->uid, f->op, cred->suid); | ||
| 555 | case AUDIT_COMPARE_UID_TO_FSUID: | ||
| 556 | return audit_comparator(cred->uid, f->op, cred->fsuid); | ||
| 557 | /* auid comparisons */ | ||
| 558 | case AUDIT_COMPARE_AUID_TO_EUID: | ||
| 559 | return audit_comparator(tsk->loginuid, f->op, cred->euid); | ||
| 560 | case AUDIT_COMPARE_AUID_TO_SUID: | ||
| 561 | return audit_comparator(tsk->loginuid, f->op, cred->suid); | ||
| 562 | case AUDIT_COMPARE_AUID_TO_FSUID: | ||
| 563 | return audit_comparator(tsk->loginuid, f->op, cred->fsuid); | ||
| 564 | /* euid comparisons */ | ||
| 565 | case AUDIT_COMPARE_EUID_TO_SUID: | ||
| 566 | return audit_comparator(cred->euid, f->op, cred->suid); | ||
| 567 | case AUDIT_COMPARE_EUID_TO_FSUID: | ||
| 568 | return audit_comparator(cred->euid, f->op, cred->fsuid); | ||
| 569 | /* suid comparisons */ | ||
| 570 | case AUDIT_COMPARE_SUID_TO_FSUID: | ||
| 571 | return audit_comparator(cred->suid, f->op, cred->fsuid); | ||
| 572 | /* gid comparisons */ | ||
| 573 | case AUDIT_COMPARE_GID_TO_EGID: | ||
| 574 | return audit_comparator(cred->gid, f->op, cred->egid); | ||
| 575 | case AUDIT_COMPARE_GID_TO_SGID: | ||
| 576 | return audit_comparator(cred->gid, f->op, cred->sgid); | ||
| 577 | case AUDIT_COMPARE_GID_TO_FSGID: | ||
| 578 | return audit_comparator(cred->gid, f->op, cred->fsgid); | ||
| 579 | /* egid comparisons */ | ||
| 580 | case AUDIT_COMPARE_EGID_TO_SGID: | ||
| 581 | return audit_comparator(cred->egid, f->op, cred->sgid); | ||
| 582 | case AUDIT_COMPARE_EGID_TO_FSGID: | ||
| 583 | return audit_comparator(cred->egid, f->op, cred->fsgid); | ||
| 584 | /* sgid comparison */ | ||
| 585 | case AUDIT_COMPARE_SGID_TO_FSGID: | ||
| 586 | return audit_comparator(cred->sgid, f->op, cred->fsgid); | ||
| 587 | default: | ||
| 588 | WARN(1, "Missing AUDIT_COMPARE define. Report as a bug\n"); | ||
| 589 | return 0; | ||
| 590 | } | ||
| 591 | return 0; | ||
| 592 | } | ||
| 593 | |||
| 444 | /* Determine if any context name data matches a rule's watch data */ | 594 | /* Determine if any context name data matches a rule's watch data */ |
| 445 | /* Compare a task_struct with an audit_rule. Return 1 on match, 0 | 595 | /* Compare a task_struct with an audit_rule. Return 1 on match, 0 |
| 446 | * otherwise. | 596 | * otherwise. |
| @@ -457,13 +607,14 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
| 457 | bool task_creation) | 607 | bool task_creation) |
| 458 | { | 608 | { |
| 459 | const struct cred *cred; | 609 | const struct cred *cred; |
| 460 | int i, j, need_sid = 1; | 610 | int i, need_sid = 1; |
| 461 | u32 sid; | 611 | u32 sid; |
| 462 | 612 | ||
| 463 | cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation); | 613 | cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation); |
| 464 | 614 | ||
| 465 | for (i = 0; i < rule->field_count; i++) { | 615 | for (i = 0; i < rule->field_count; i++) { |
| 466 | struct audit_field *f = &rule->fields[i]; | 616 | struct audit_field *f = &rule->fields[i]; |
| 617 | struct audit_names *n; | ||
| 467 | int result = 0; | 618 | int result = 0; |
| 468 | 619 | ||
| 469 | switch (f->type) { | 620 | switch (f->type) { |
| @@ -522,12 +673,14 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
| 522 | } | 673 | } |
| 523 | break; | 674 | break; |
| 524 | case AUDIT_DEVMAJOR: | 675 | case AUDIT_DEVMAJOR: |
| 525 | if (name) | 676 | if (name) { |
| 526 | result = audit_comparator(MAJOR(name->dev), | 677 | if (audit_comparator(MAJOR(name->dev), f->op, f->val) || |
| 527 | f->op, f->val); | 678 | audit_comparator(MAJOR(name->rdev), f->op, f->val)) |
| 528 | else if (ctx) { | 679 | ++result; |
| 529 | for (j = 0; j < ctx->name_count; j++) { | 680 | } else if (ctx) { |
| 530 | if (audit_comparator(MAJOR(ctx->names[j].dev), f->op, f->val)) { | 681 | list_for_each_entry(n, &ctx->names_list, list) { |
| 682 | if (audit_comparator(MAJOR(n->dev), f->op, f->val) || | ||
| 683 | audit_comparator(MAJOR(n->rdev), f->op, f->val)) { | ||
| 531 | ++result; | 684 | ++result; |
| 532 | break; | 685 | break; |
| 533 | } | 686 | } |
| @@ -535,12 +688,14 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
| 535 | } | 688 | } |
| 536 | break; | 689 | break; |
| 537 | case AUDIT_DEVMINOR: | 690 | case AUDIT_DEVMINOR: |
| 538 | if (name) | 691 | if (name) { |
| 539 | result = audit_comparator(MINOR(name->dev), | 692 | if (audit_comparator(MINOR(name->dev), f->op, f->val) || |
| 540 | f->op, f->val); | 693 | audit_comparator(MINOR(name->rdev), f->op, f->val)) |
| 541 | else if (ctx) { | 694 | ++result; |
| 542 | for (j = 0; j < ctx->name_count; j++) { | 695 | } else if (ctx) { |
| 543 | if (audit_comparator(MINOR(ctx->names[j].dev), f->op, f->val)) { | 696 | list_for_each_entry(n, &ctx->names_list, list) { |
| 697 | if (audit_comparator(MINOR(n->dev), f->op, f->val) || | ||
| 698 | audit_comparator(MINOR(n->rdev), f->op, f->val)) { | ||
| 544 | ++result; | 699 | ++result; |
| 545 | break; | 700 | break; |
| 546 | } | 701 | } |
| @@ -551,8 +706,32 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
| 551 | if (name) | 706 | if (name) |
| 552 | result = (name->ino == f->val); | 707 | result = (name->ino == f->val); |
| 553 | else if (ctx) { | 708 | else if (ctx) { |
| 554 | for (j = 0; j < ctx->name_count; j++) { | 709 | list_for_each_entry(n, &ctx->names_list, list) { |
| 555 | if (audit_comparator(ctx->names[j].ino, f->op, f->val)) { | 710 | if (audit_comparator(n->ino, f->op, f->val)) { |
| 711 | ++result; | ||
| 712 | break; | ||
| 713 | } | ||
| 714 | } | ||
| 715 | } | ||
| 716 | break; | ||
| 717 | case AUDIT_OBJ_UID: | ||
| 718 | if (name) { | ||
| 719 | result = audit_comparator(name->uid, f->op, f->val); | ||
| 720 | } else if (ctx) { | ||
| 721 | list_for_each_entry(n, &ctx->names_list, list) { | ||
| 722 | if (audit_comparator(n->uid, f->op, f->val)) { | ||
| 723 | ++result; | ||
| 724 | break; | ||
| 725 | } | ||
| 726 | } | ||
| 727 | } | ||
| 728 | break; | ||
| 729 | case AUDIT_OBJ_GID: | ||
| 730 | if (name) { | ||
| 731 | result = audit_comparator(name->gid, f->op, f->val); | ||
| 732 | } else if (ctx) { | ||
| 733 | list_for_each_entry(n, &ctx->names_list, list) { | ||
| 734 | if (audit_comparator(n->gid, f->op, f->val)) { | ||
| 556 | ++result; | 735 | ++result; |
| 557 | break; | 736 | break; |
| 558 | } | 737 | } |
| @@ -607,11 +786,10 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
| 607 | name->osid, f->type, f->op, | 786 | name->osid, f->type, f->op, |
| 608 | f->lsm_rule, ctx); | 787 | f->lsm_rule, ctx); |
| 609 | } else if (ctx) { | 788 | } else if (ctx) { |
| 610 | for (j = 0; j < ctx->name_count; j++) { | 789 | list_for_each_entry(n, &ctx->names_list, list) { |
| 611 | if (security_audit_rule_match( | 790 | if (security_audit_rule_match(n->osid, f->type, |
| 612 | ctx->names[j].osid, | 791 | f->op, f->lsm_rule, |
| 613 | f->type, f->op, | 792 | ctx)) { |
| 614 | f->lsm_rule, ctx)) { | ||
| 615 | ++result; | 793 | ++result; |
| 616 | break; | 794 | break; |
| 617 | } | 795 | } |
| @@ -643,8 +821,10 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
| 643 | case AUDIT_FILETYPE: | 821 | case AUDIT_FILETYPE: |
| 644 | result = audit_match_filetype(ctx, f->val); | 822 | result = audit_match_filetype(ctx, f->val); |
| 645 | break; | 823 | break; |
| 824 | case AUDIT_FIELD_COMPARE: | ||
| 825 | result = audit_field_compare(tsk, cred, f, ctx, name); | ||
| 826 | break; | ||
| 646 | } | 827 | } |
| 647 | |||
| 648 | if (!result) | 828 | if (!result) |
| 649 | return 0; | 829 | return 0; |
| 650 | } | 830 | } |
| @@ -722,40 +902,53 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, | |||
| 722 | return AUDIT_BUILD_CONTEXT; | 902 | return AUDIT_BUILD_CONTEXT; |
| 723 | } | 903 | } |
| 724 | 904 | ||
| 725 | /* At syscall exit time, this filter is called if any audit_names[] have been | 905 | /* |
| 906 | * Given an audit_name check the inode hash table to see if they match. | ||
| 907 | * Called holding the rcu read lock to protect the use of audit_inode_hash | ||
| 908 | */ | ||
| 909 | static int audit_filter_inode_name(struct task_struct *tsk, | ||
| 910 | struct audit_names *n, | ||
| 911 | struct audit_context *ctx) { | ||
| 912 | int word, bit; | ||
| 913 | int h = audit_hash_ino((u32)n->ino); | ||
| 914 | struct list_head *list = &audit_inode_hash[h]; | ||
| 915 | struct audit_entry *e; | ||
| 916 | enum audit_state state; | ||
| 917 | |||
| 918 | word = AUDIT_WORD(ctx->major); | ||
| 919 | bit = AUDIT_BIT(ctx->major); | ||
| 920 | |||
| 921 | if (list_empty(list)) | ||
| 922 | return 0; | ||
| 923 | |||
| 924 | list_for_each_entry_rcu(e, list, list) { | ||
| 925 | if ((e->rule.mask[word] & bit) == bit && | ||
| 926 | audit_filter_rules(tsk, &e->rule, ctx, n, &state, false)) { | ||
| 927 | ctx->current_state = state; | ||
| 928 | return 1; | ||
| 929 | } | ||
| 930 | } | ||
| 931 | |||
| 932 | return 0; | ||
| 933 | } | ||
| 934 | |||
| 935 | /* At syscall exit time, this filter is called if any audit_names have been | ||
| 726 | * collected during syscall processing. We only check rules in sublists at hash | 936 | * collected during syscall processing. We only check rules in sublists at hash |
| 727 | * buckets applicable to the inode numbers in audit_names[]. | 937 | * buckets applicable to the inode numbers in audit_names. |
| 728 | * Regarding audit_state, same rules apply as for audit_filter_syscall(). | 938 | * Regarding audit_state, same rules apply as for audit_filter_syscall(). |
| 729 | */ | 939 | */ |
| 730 | void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx) | 940 | void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx) |
| 731 | { | 941 | { |
| 732 | int i; | 942 | struct audit_names *n; |
| 733 | struct audit_entry *e; | ||
| 734 | enum audit_state state; | ||
| 735 | 943 | ||
| 736 | if (audit_pid && tsk->tgid == audit_pid) | 944 | if (audit_pid && tsk->tgid == audit_pid) |
| 737 | return; | 945 | return; |
| 738 | 946 | ||
| 739 | rcu_read_lock(); | 947 | rcu_read_lock(); |
| 740 | for (i = 0; i < ctx->name_count; i++) { | ||
| 741 | int word = AUDIT_WORD(ctx->major); | ||
| 742 | int bit = AUDIT_BIT(ctx->major); | ||
| 743 | struct audit_names *n = &ctx->names[i]; | ||
| 744 | int h = audit_hash_ino((u32)n->ino); | ||
| 745 | struct list_head *list = &audit_inode_hash[h]; | ||
| 746 | |||
| 747 | if (list_empty(list)) | ||
| 748 | continue; | ||
| 749 | 948 | ||
| 750 | list_for_each_entry_rcu(e, list, list) { | 949 | list_for_each_entry(n, &ctx->names_list, list) { |
| 751 | if ((e->rule.mask[word] & bit) == bit && | 950 | if (audit_filter_inode_name(tsk, n, ctx)) |
| 752 | audit_filter_rules(tsk, &e->rule, ctx, n, | 951 | break; |
| 753 | &state, false)) { | ||
| 754 | rcu_read_unlock(); | ||
| 755 | ctx->current_state = state; | ||
| 756 | return; | ||
| 757 | } | ||
| 758 | } | ||
| 759 | } | 952 | } |
| 760 | rcu_read_unlock(); | 953 | rcu_read_unlock(); |
| 761 | } | 954 | } |
| @@ -766,7 +959,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk, | |||
| 766 | { | 959 | { |
| 767 | struct audit_context *context = tsk->audit_context; | 960 | struct audit_context *context = tsk->audit_context; |
| 768 | 961 | ||
| 769 | if (likely(!context)) | 962 | if (!context) |
| 770 | return NULL; | 963 | return NULL; |
| 771 | context->return_valid = return_valid; | 964 | context->return_valid = return_valid; |
| 772 | 965 | ||
| @@ -799,7 +992,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk, | |||
| 799 | 992 | ||
| 800 | static inline void audit_free_names(struct audit_context *context) | 993 | static inline void audit_free_names(struct audit_context *context) |
| 801 | { | 994 | { |
| 802 | int i; | 995 | struct audit_names *n, *next; |
| 803 | 996 | ||
| 804 | #if AUDIT_DEBUG == 2 | 997 | #if AUDIT_DEBUG == 2 |
| 805 | if (context->put_count + context->ino_count != context->name_count) { | 998 | if (context->put_count + context->ino_count != context->name_count) { |
| @@ -810,10 +1003,9 @@ static inline void audit_free_names(struct audit_context *context) | |||
| 810 | context->serial, context->major, context->in_syscall, | 1003 | context->serial, context->major, context->in_syscall, |
| 811 | context->name_count, context->put_count, | 1004 | context->name_count, context->put_count, |
| 812 | context->ino_count); | 1005 | context->ino_count); |
| 813 | for (i = 0; i < context->name_count; i++) { | 1006 | list_for_each_entry(n, &context->names_list, list) { |
| 814 | printk(KERN_ERR "names[%d] = %p = %s\n", i, | 1007 | printk(KERN_ERR "names[%d] = %p = %s\n", i, |
| 815 | context->names[i].name, | 1008 | n->name, n->name ?: "(null)"); |
| 816 | context->names[i].name ?: "(null)"); | ||
| 817 | } | 1009 | } |
| 818 | dump_stack(); | 1010 | dump_stack(); |
| 819 | return; | 1011 | return; |
| @@ -824,9 +1016,12 @@ static inline void audit_free_names(struct audit_context *context) | |||
| 824 | context->ino_count = 0; | 1016 | context->ino_count = 0; |
| 825 | #endif | 1017 | #endif |
| 826 | 1018 | ||
| 827 | for (i = 0; i < context->name_count; i++) { | 1019 | list_for_each_entry_safe(n, next, &context->names_list, list) { |
| 828 | if (context->names[i].name && context->names[i].name_put) | 1020 | list_del(&n->list); |
| 829 | __putname(context->names[i].name); | 1021 | if (n->name && n->name_put) |
| 1022 | __putname(n->name); | ||
| 1023 | if (n->should_free) | ||
| 1024 | kfree(n); | ||
| 830 | } | 1025 | } |
| 831 | context->name_count = 0; | 1026 | context->name_count = 0; |
| 832 | path_put(&context->pwd); | 1027 | path_put(&context->pwd); |
| @@ -864,6 +1059,7 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state) | |||
| 864 | return NULL; | 1059 | return NULL; |
| 865 | audit_zero_context(context, state); | 1060 | audit_zero_context(context, state); |
| 866 | INIT_LIST_HEAD(&context->killed_trees); | 1061 | INIT_LIST_HEAD(&context->killed_trees); |
| 1062 | INIT_LIST_HEAD(&context->names_list); | ||
| 867 | return context; | 1063 | return context; |
| 868 | } | 1064 | } |
| 869 | 1065 | ||
| @@ -886,7 +1082,7 @@ int audit_alloc(struct task_struct *tsk) | |||
| 886 | return 0; /* Return if not auditing. */ | 1082 | return 0; /* Return if not auditing. */ |
| 887 | 1083 | ||
| 888 | state = audit_filter_task(tsk, &key); | 1084 | state = audit_filter_task(tsk, &key); |
| 889 | if (likely(state == AUDIT_DISABLED)) | 1085 | if (state == AUDIT_DISABLED) |
| 890 | return 0; | 1086 | return 0; |
| 891 | 1087 | ||
| 892 | if (!(context = audit_alloc_context(state))) { | 1088 | if (!(context = audit_alloc_context(state))) { |
| @@ -975,7 +1171,7 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk | |||
| 975 | while (vma) { | 1171 | while (vma) { |
| 976 | if ((vma->vm_flags & VM_EXECUTABLE) && | 1172 | if ((vma->vm_flags & VM_EXECUTABLE) && |
| 977 | vma->vm_file) { | 1173 | vma->vm_file) { |
| 978 | audit_log_d_path(ab, "exe=", | 1174 | audit_log_d_path(ab, " exe=", |
| 979 | &vma->vm_file->f_path); | 1175 | &vma->vm_file->f_path); |
| 980 | break; | 1176 | break; |
| 981 | } | 1177 | } |
| @@ -1166,8 +1362,8 @@ static void audit_log_execve_info(struct audit_context *context, | |||
| 1166 | struct audit_buffer **ab, | 1362 | struct audit_buffer **ab, |
| 1167 | struct audit_aux_data_execve *axi) | 1363 | struct audit_aux_data_execve *axi) |
| 1168 | { | 1364 | { |
| 1169 | int i; | 1365 | int i, len; |
| 1170 | size_t len, len_sent = 0; | 1366 | size_t len_sent = 0; |
| 1171 | const char __user *p; | 1367 | const char __user *p; |
| 1172 | char *buf; | 1368 | char *buf; |
| 1173 | 1369 | ||
| @@ -1249,7 +1445,7 @@ static void show_special(struct audit_context *context, int *call_panic) | |||
| 1249 | case AUDIT_IPC: { | 1445 | case AUDIT_IPC: { |
| 1250 | u32 osid = context->ipc.osid; | 1446 | u32 osid = context->ipc.osid; |
| 1251 | 1447 | ||
| 1252 | audit_log_format(ab, "ouid=%u ogid=%u mode=%#o", | 1448 | audit_log_format(ab, "ouid=%u ogid=%u mode=%#ho", |
| 1253 | context->ipc.uid, context->ipc.gid, context->ipc.mode); | 1449 | context->ipc.uid, context->ipc.gid, context->ipc.mode); |
| 1254 | if (osid) { | 1450 | if (osid) { |
| 1255 | char *ctx = NULL; | 1451 | char *ctx = NULL; |
| @@ -1267,7 +1463,7 @@ static void show_special(struct audit_context *context, int *call_panic) | |||
| 1267 | ab = audit_log_start(context, GFP_KERNEL, | 1463 | ab = audit_log_start(context, GFP_KERNEL, |
| 1268 | AUDIT_IPC_SET_PERM); | 1464 | AUDIT_IPC_SET_PERM); |
| 1269 | audit_log_format(ab, | 1465 | audit_log_format(ab, |
| 1270 | "qbytes=%lx ouid=%u ogid=%u mode=%#o", | 1466 | "qbytes=%lx ouid=%u ogid=%u mode=%#ho", |
| 1271 | context->ipc.qbytes, | 1467 | context->ipc.qbytes, |
| 1272 | context->ipc.perm_uid, | 1468 | context->ipc.perm_uid, |
| 1273 | context->ipc.perm_gid, | 1469 | context->ipc.perm_gid, |
| @@ -1278,7 +1474,7 @@ static void show_special(struct audit_context *context, int *call_panic) | |||
| 1278 | break; } | 1474 | break; } |
| 1279 | case AUDIT_MQ_OPEN: { | 1475 | case AUDIT_MQ_OPEN: { |
| 1280 | audit_log_format(ab, | 1476 | audit_log_format(ab, |
| 1281 | "oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld " | 1477 | "oflag=0x%x mode=%#ho mq_flags=0x%lx mq_maxmsg=%ld " |
| 1282 | "mq_msgsize=%ld mq_curmsgs=%ld", | 1478 | "mq_msgsize=%ld mq_curmsgs=%ld", |
| 1283 | context->mq_open.oflag, context->mq_open.mode, | 1479 | context->mq_open.oflag, context->mq_open.mode, |
| 1284 | context->mq_open.attr.mq_flags, | 1480 | context->mq_open.attr.mq_flags, |
| @@ -1324,6 +1520,68 @@ static void show_special(struct audit_context *context, int *call_panic) | |||
| 1324 | audit_log_end(ab); | 1520 | audit_log_end(ab); |
| 1325 | } | 1521 | } |
| 1326 | 1522 | ||
| 1523 | static void audit_log_name(struct audit_context *context, struct audit_names *n, | ||
| 1524 | int record_num, int *call_panic) | ||
| 1525 | { | ||
| 1526 | struct audit_buffer *ab; | ||
| 1527 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); | ||
| 1528 | if (!ab) | ||
| 1529 | return; /* audit_panic has been called */ | ||
| 1530 | |||
| 1531 | audit_log_format(ab, "item=%d", record_num); | ||
| 1532 | |||
| 1533 | if (n->name) { | ||
| 1534 | switch (n->name_len) { | ||
| 1535 | case AUDIT_NAME_FULL: | ||
| 1536 | /* log the full path */ | ||
| 1537 | audit_log_format(ab, " name="); | ||
| 1538 | audit_log_untrustedstring(ab, n->name); | ||
| 1539 | break; | ||
| 1540 | case 0: | ||
| 1541 | /* name was specified as a relative path and the | ||
| 1542 | * directory component is the cwd */ | ||
| 1543 | audit_log_d_path(ab, " name=", &context->pwd); | ||
| 1544 | break; | ||
| 1545 | default: | ||
| 1546 | /* log the name's directory component */ | ||
| 1547 | audit_log_format(ab, " name="); | ||
| 1548 | audit_log_n_untrustedstring(ab, n->name, | ||
| 1549 | n->name_len); | ||
| 1550 | } | ||
| 1551 | } else | ||
| 1552 | audit_log_format(ab, " name=(null)"); | ||
| 1553 | |||
| 1554 | if (n->ino != (unsigned long)-1) { | ||
| 1555 | audit_log_format(ab, " inode=%lu" | ||
| 1556 | " dev=%02x:%02x mode=%#ho" | ||
| 1557 | " ouid=%u ogid=%u rdev=%02x:%02x", | ||
| 1558 | n->ino, | ||
| 1559 | MAJOR(n->dev), | ||
| 1560 | MINOR(n->dev), | ||
| 1561 | n->mode, | ||
| 1562 | n->uid, | ||
| 1563 | n->gid, | ||
| 1564 | MAJOR(n->rdev), | ||
| 1565 | MINOR(n->rdev)); | ||
| 1566 | } | ||
| 1567 | if (n->osid != 0) { | ||
| 1568 | char *ctx = NULL; | ||
| 1569 | u32 len; | ||
| 1570 | if (security_secid_to_secctx( | ||
| 1571 | n->osid, &ctx, &len)) { | ||
| 1572 | audit_log_format(ab, " osid=%u", n->osid); | ||
| 1573 | *call_panic = 2; | ||
| 1574 | } else { | ||
| 1575 | audit_log_format(ab, " obj=%s", ctx); | ||
| 1576 | security_release_secctx(ctx, len); | ||
| 1577 | } | ||
| 1578 | } | ||
| 1579 | |||
| 1580 | audit_log_fcaps(ab, n); | ||
| 1581 | |||
| 1582 | audit_log_end(ab); | ||
| 1583 | } | ||
| 1584 | |||
| 1327 | static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) | 1585 | static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) |
| 1328 | { | 1586 | { |
| 1329 | const struct cred *cred; | 1587 | const struct cred *cred; |
| @@ -1331,6 +1589,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
| 1331 | struct audit_buffer *ab; | 1589 | struct audit_buffer *ab; |
| 1332 | struct audit_aux_data *aux; | 1590 | struct audit_aux_data *aux; |
| 1333 | const char *tty; | 1591 | const char *tty; |
| 1592 | struct audit_names *n; | ||
| 1334 | 1593 | ||
| 1335 | /* tsk == current */ | 1594 | /* tsk == current */ |
| 1336 | context->pid = tsk->pid; | 1595 | context->pid = tsk->pid; |
| @@ -1466,70 +1725,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
| 1466 | if (context->pwd.dentry && context->pwd.mnt) { | 1725 | if (context->pwd.dentry && context->pwd.mnt) { |
| 1467 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); | 1726 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); |
| 1468 | if (ab) { | 1727 | if (ab) { |
| 1469 | audit_log_d_path(ab, "cwd=", &context->pwd); | 1728 | audit_log_d_path(ab, " cwd=", &context->pwd); |
| 1470 | audit_log_end(ab); | 1729 | audit_log_end(ab); |
| 1471 | } | 1730 | } |
| 1472 | } | 1731 | } |
| 1473 | for (i = 0; i < context->name_count; i++) { | ||
| 1474 | struct audit_names *n = &context->names[i]; | ||
| 1475 | 1732 | ||
| 1476 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); | 1733 | i = 0; |
| 1477 | if (!ab) | 1734 | list_for_each_entry(n, &context->names_list, list) |
| 1478 | continue; /* audit_panic has been called */ | 1735 | audit_log_name(context, n, i++, &call_panic); |
| 1479 | |||
| 1480 | audit_log_format(ab, "item=%d", i); | ||
| 1481 | |||
| 1482 | if (n->name) { | ||
| 1483 | switch(n->name_len) { | ||
| 1484 | case AUDIT_NAME_FULL: | ||
| 1485 | /* log the full path */ | ||
| 1486 | audit_log_format(ab, " name="); | ||
| 1487 | audit_log_untrustedstring(ab, n->name); | ||
| 1488 | break; | ||
| 1489 | case 0: | ||
| 1490 | /* name was specified as a relative path and the | ||
| 1491 | * directory component is the cwd */ | ||
| 1492 | audit_log_d_path(ab, "name=", &context->pwd); | ||
| 1493 | break; | ||
| 1494 | default: | ||
| 1495 | /* log the name's directory component */ | ||
| 1496 | audit_log_format(ab, " name="); | ||
| 1497 | audit_log_n_untrustedstring(ab, n->name, | ||
| 1498 | n->name_len); | ||
| 1499 | } | ||
| 1500 | } else | ||
| 1501 | audit_log_format(ab, " name=(null)"); | ||
| 1502 | |||
| 1503 | if (n->ino != (unsigned long)-1) { | ||
| 1504 | audit_log_format(ab, " inode=%lu" | ||
| 1505 | " dev=%02x:%02x mode=%#o" | ||
| 1506 | " ouid=%u ogid=%u rdev=%02x:%02x", | ||
| 1507 | n->ino, | ||
| 1508 | MAJOR(n->dev), | ||
| 1509 | MINOR(n->dev), | ||
| 1510 | n->mode, | ||
| 1511 | n->uid, | ||
| 1512 | n->gid, | ||
| 1513 | MAJOR(n->rdev), | ||
| 1514 | MINOR(n->rdev)); | ||
| 1515 | } | ||
| 1516 | if (n->osid != 0) { | ||
| 1517 | char *ctx = NULL; | ||
| 1518 | u32 len; | ||
| 1519 | if (security_secid_to_secctx( | ||
| 1520 | n->osid, &ctx, &len)) { | ||
| 1521 | audit_log_format(ab, " osid=%u", n->osid); | ||
| 1522 | call_panic = 2; | ||
| 1523 | } else { | ||
| 1524 | audit_log_format(ab, " obj=%s", ctx); | ||
| 1525 | security_release_secctx(ctx, len); | ||
| 1526 | } | ||
| 1527 | } | ||
| 1528 | |||
| 1529 | audit_log_fcaps(ab, n); | ||
| 1530 | |||
| 1531 | audit_log_end(ab); | ||
| 1532 | } | ||
| 1533 | 1736 | ||
| 1534 | /* Send end of event record to help user space know we are finished */ | 1737 | /* Send end of event record to help user space know we are finished */ |
| 1535 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); | 1738 | ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); |
| @@ -1545,12 +1748,12 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
| 1545 | * | 1748 | * |
| 1546 | * Called from copy_process and do_exit | 1749 | * Called from copy_process and do_exit |
| 1547 | */ | 1750 | */ |
| 1548 | void audit_free(struct task_struct *tsk) | 1751 | void __audit_free(struct task_struct *tsk) |
| 1549 | { | 1752 | { |
| 1550 | struct audit_context *context; | 1753 | struct audit_context *context; |
| 1551 | 1754 | ||
| 1552 | context = audit_get_context(tsk, 0, 0); | 1755 | context = audit_get_context(tsk, 0, 0); |
| 1553 | if (likely(!context)) | 1756 | if (!context) |
| 1554 | return; | 1757 | return; |
| 1555 | 1758 | ||
| 1556 | /* Check for system calls that do not go through the exit | 1759 | /* Check for system calls that do not go through the exit |
| @@ -1583,7 +1786,7 @@ void audit_free(struct task_struct *tsk) | |||
| 1583 | * will only be written if another part of the kernel requests that it | 1786 | * will only be written if another part of the kernel requests that it |
| 1584 | * be written). | 1787 | * be written). |
| 1585 | */ | 1788 | */ |
| 1586 | void audit_syscall_entry(int arch, int major, | 1789 | void __audit_syscall_entry(int arch, int major, |
| 1587 | unsigned long a1, unsigned long a2, | 1790 | unsigned long a1, unsigned long a2, |
| 1588 | unsigned long a3, unsigned long a4) | 1791 | unsigned long a3, unsigned long a4) |
| 1589 | { | 1792 | { |
| @@ -1591,7 +1794,7 @@ void audit_syscall_entry(int arch, int major, | |||
| 1591 | struct audit_context *context = tsk->audit_context; | 1794 | struct audit_context *context = tsk->audit_context; |
| 1592 | enum audit_state state; | 1795 | enum audit_state state; |
| 1593 | 1796 | ||
| 1594 | if (unlikely(!context)) | 1797 | if (!context) |
| 1595 | return; | 1798 | return; |
| 1596 | 1799 | ||
| 1597 | /* | 1800 | /* |
| @@ -1648,7 +1851,7 @@ void audit_syscall_entry(int arch, int major, | |||
| 1648 | context->prio = 0; | 1851 | context->prio = 0; |
| 1649 | state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_ENTRY]); | 1852 | state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_ENTRY]); |
| 1650 | } | 1853 | } |
| 1651 | if (likely(state == AUDIT_DISABLED)) | 1854 | if (state == AUDIT_DISABLED) |
| 1652 | return; | 1855 | return; |
| 1653 | 1856 | ||
| 1654 | context->serial = 0; | 1857 | context->serial = 0; |
| @@ -1658,30 +1861,9 @@ void audit_syscall_entry(int arch, int major, | |||
| 1658 | context->ppid = 0; | 1861 | context->ppid = 0; |
| 1659 | } | 1862 | } |
| 1660 | 1863 | ||
| 1661 | void audit_finish_fork(struct task_struct *child) | ||
| 1662 | { | ||
| 1663 | struct audit_context *ctx = current->audit_context; | ||
| 1664 | struct audit_context *p = child->audit_context; | ||
| 1665 | if (!p || !ctx) | ||
| 1666 | return; | ||
| 1667 | if (!ctx->in_syscall || ctx->current_state != AUDIT_RECORD_CONTEXT) | ||
| 1668 | return; | ||
| 1669 | p->arch = ctx->arch; | ||
| 1670 | p->major = ctx->major; | ||
| 1671 | memcpy(p->argv, ctx->argv, sizeof(ctx->argv)); | ||
| 1672 | p->ctime = ctx->ctime; | ||
| 1673 | p->dummy = ctx->dummy; | ||
| 1674 | p->in_syscall = ctx->in_syscall; | ||
| 1675 | p->filterkey = kstrdup(ctx->filterkey, GFP_KERNEL); | ||
| 1676 | p->ppid = current->pid; | ||
| 1677 | p->prio = ctx->prio; | ||
| 1678 | p->current_state = ctx->current_state; | ||
| 1679 | } | ||
| 1680 | |||
| 1681 | /** | 1864 | /** |
| 1682 | * audit_syscall_exit - deallocate audit context after a system call | 1865 | * audit_syscall_exit - deallocate audit context after a system call |
| 1683 | * @valid: success/failure flag | 1866 | * @pt_regs: syscall registers |
| 1684 | * @return_code: syscall return value | ||
| 1685 | * | 1867 | * |
| 1686 | * Tear down after system call. If the audit context has been marked as | 1868 | * Tear down after system call. If the audit context has been marked as |
| 1687 | * auditable (either because of the AUDIT_RECORD_CONTEXT state from | 1869 | * auditable (either because of the AUDIT_RECORD_CONTEXT state from |
| @@ -1689,14 +1871,18 @@ void audit_finish_fork(struct task_struct *child) | |||
| 1689 | * message), then write out the syscall information. In call cases, | 1871 | * message), then write out the syscall information. In call cases, |
| 1690 | * free the names stored from getname(). | 1872 | * free the names stored from getname(). |
| 1691 | */ | 1873 | */ |
| 1692 | void audit_syscall_exit(int valid, long return_code) | 1874 | void __audit_syscall_exit(int success, long return_code) |
| 1693 | { | 1875 | { |
| 1694 | struct task_struct *tsk = current; | 1876 | struct task_struct *tsk = current; |
| 1695 | struct audit_context *context; | 1877 | struct audit_context *context; |
| 1696 | 1878 | ||
| 1697 | context = audit_get_context(tsk, valid, return_code); | 1879 | if (success) |
| 1880 | success = AUDITSC_SUCCESS; | ||
| 1881 | else | ||
| 1882 | success = AUDITSC_FAILURE; | ||
| 1698 | 1883 | ||
| 1699 | if (likely(!context)) | 1884 | context = audit_get_context(tsk, success, return_code); |
| 1885 | if (!context) | ||
| 1700 | return; | 1886 | return; |
| 1701 | 1887 | ||
| 1702 | if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT) | 1888 | if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT) |
| @@ -1821,6 +2007,30 @@ retry: | |||
| 1821 | #endif | 2007 | #endif |
| 1822 | } | 2008 | } |
| 1823 | 2009 | ||
| 2010 | static struct audit_names *audit_alloc_name(struct audit_context *context) | ||
| 2011 | { | ||
| 2012 | struct audit_names *aname; | ||
| 2013 | |||
| 2014 | if (context->name_count < AUDIT_NAMES) { | ||
| 2015 | aname = &context->preallocated_names[context->name_count]; | ||
| 2016 | memset(aname, 0, sizeof(*aname)); | ||
| 2017 | } else { | ||
| 2018 | aname = kzalloc(sizeof(*aname), GFP_NOFS); | ||
| 2019 | if (!aname) | ||
| 2020 | return NULL; | ||
| 2021 | aname->should_free = true; | ||
| 2022 | } | ||
| 2023 | |||
| 2024 | aname->ino = (unsigned long)-1; | ||
| 2025 | list_add_tail(&aname->list, &context->names_list); | ||
| 2026 | |||
| 2027 | context->name_count++; | ||
| 2028 | #if AUDIT_DEBUG | ||
| 2029 | context->ino_count++; | ||
| 2030 | #endif | ||
| 2031 | return aname; | ||
| 2032 | } | ||
| 2033 | |||
| 1824 | /** | 2034 | /** |
| 1825 | * audit_getname - add a name to the list | 2035 | * audit_getname - add a name to the list |
| 1826 | * @name: name to add | 2036 | * @name: name to add |
| @@ -1831,9 +2041,7 @@ retry: | |||
| 1831 | void __audit_getname(const char *name) | 2041 | void __audit_getname(const char *name) |
| 1832 | { | 2042 | { |
| 1833 | struct audit_context *context = current->audit_context; | 2043 | struct audit_context *context = current->audit_context; |
| 1834 | 2044 | struct audit_names *n; | |
| 1835 | if (IS_ERR(name) || !name) | ||
| 1836 | return; | ||
| 1837 | 2045 | ||
| 1838 | if (!context->in_syscall) { | 2046 | if (!context->in_syscall) { |
| 1839 | #if AUDIT_DEBUG == 2 | 2047 | #if AUDIT_DEBUG == 2 |
| @@ -1843,13 +2051,15 @@ void __audit_getname(const char *name) | |||
| 1843 | #endif | 2051 | #endif |
| 1844 | return; | 2052 | return; |
| 1845 | } | 2053 | } |
| 1846 | BUG_ON(context->name_count >= AUDIT_NAMES); | 2054 | |
| 1847 | context->names[context->name_count].name = name; | 2055 | n = audit_alloc_name(context); |
| 1848 | context->names[context->name_count].name_len = AUDIT_NAME_FULL; | 2056 | if (!n) |
| 1849 | context->names[context->name_count].name_put = 1; | 2057 | return; |
| 1850 | context->names[context->name_count].ino = (unsigned long)-1; | 2058 | |
| 1851 | context->names[context->name_count].osid = 0; | 2059 | n->name = name; |
| 1852 | ++context->name_count; | 2060 | n->name_len = AUDIT_NAME_FULL; |
| 2061 | n->name_put = true; | ||
| 2062 | |||
| 1853 | if (!context->pwd.dentry) | 2063 | if (!context->pwd.dentry) |
| 1854 | get_fs_pwd(current->fs, &context->pwd); | 2064 | get_fs_pwd(current->fs, &context->pwd); |
| 1855 | } | 2065 | } |
| @@ -1871,12 +2081,13 @@ void audit_putname(const char *name) | |||
| 1871 | printk(KERN_ERR "%s:%d(:%d): __putname(%p)\n", | 2081 | printk(KERN_ERR "%s:%d(:%d): __putname(%p)\n", |
| 1872 | __FILE__, __LINE__, context->serial, name); | 2082 | __FILE__, __LINE__, context->serial, name); |
| 1873 | if (context->name_count) { | 2083 | if (context->name_count) { |
| 2084 | struct audit_names *n; | ||
| 1874 | int i; | 2085 | int i; |
| 1875 | for (i = 0; i < context->name_count; i++) | 2086 | |
| 2087 | list_for_each_entry(n, &context->names_list, list) | ||
| 1876 | printk(KERN_ERR "name[%d] = %p = %s\n", i, | 2088 | printk(KERN_ERR "name[%d] = %p = %s\n", i, |
| 1877 | context->names[i].name, | 2089 | n->name, n->name ?: "(null)"); |
| 1878 | context->names[i].name ?: "(null)"); | 2090 | } |
| 1879 | } | ||
| 1880 | #endif | 2091 | #endif |
| 1881 | __putname(name); | 2092 | __putname(name); |
| 1882 | } | 2093 | } |
| @@ -1897,39 +2108,11 @@ void audit_putname(const char *name) | |||
| 1897 | #endif | 2108 | #endif |
| 1898 | } | 2109 | } |
| 1899 | 2110 | ||
| 1900 | static int audit_inc_name_count(struct audit_context *context, | ||
| 1901 | const struct inode *inode) | ||
| 1902 | { | ||
| 1903 | if (context->name_count >= AUDIT_NAMES) { | ||
| 1904 | if (inode) | ||
| 1905 | printk(KERN_DEBUG "audit: name_count maxed, losing inode data: " | ||
| 1906 | "dev=%02x:%02x, inode=%lu\n", | ||
| 1907 | MAJOR(inode->i_sb->s_dev), | ||
| 1908 | MINOR(inode->i_sb->s_dev), | ||
| 1909 | inode->i_ino); | ||
| 1910 | |||
| 1911 | else | ||
| 1912 | printk(KERN_DEBUG "name_count maxed, losing inode data\n"); | ||
| 1913 | return 1; | ||
| 1914 | } | ||
| 1915 | context->name_count++; | ||
| 1916 | #if AUDIT_DEBUG | ||
| 1917 | context->ino_count++; | ||
| 1918 | #endif | ||
| 1919 | return 0; | ||
| 1920 | } | ||
| 1921 | |||
| 1922 | |||
| 1923 | static inline int audit_copy_fcaps(struct audit_names *name, const struct dentry *dentry) | 2111 | static inline int audit_copy_fcaps(struct audit_names *name, const struct dentry *dentry) |
| 1924 | { | 2112 | { |
| 1925 | struct cpu_vfs_cap_data caps; | 2113 | struct cpu_vfs_cap_data caps; |
| 1926 | int rc; | 2114 | int rc; |
| 1927 | 2115 | ||
| 1928 | memset(&name->fcap.permitted, 0, sizeof(kernel_cap_t)); | ||
| 1929 | memset(&name->fcap.inheritable, 0, sizeof(kernel_cap_t)); | ||
| 1930 | name->fcap.fE = 0; | ||
| 1931 | name->fcap_ver = 0; | ||
| 1932 | |||
| 1933 | if (!dentry) | 2116 | if (!dentry) |
| 1934 | return 0; | 2117 | return 0; |
| 1935 | 2118 | ||
| @@ -1969,30 +2152,25 @@ static void audit_copy_inode(struct audit_names *name, const struct dentry *dent | |||
| 1969 | */ | 2152 | */ |
| 1970 | void __audit_inode(const char *name, const struct dentry *dentry) | 2153 | void __audit_inode(const char *name, const struct dentry *dentry) |
| 1971 | { | 2154 | { |
| 1972 | int idx; | ||
| 1973 | struct audit_context *context = current->audit_context; | 2155 | struct audit_context *context = current->audit_context; |
| 1974 | const struct inode *inode = dentry->d_inode; | 2156 | const struct inode *inode = dentry->d_inode; |
| 2157 | struct audit_names *n; | ||
| 1975 | 2158 | ||
| 1976 | if (!context->in_syscall) | 2159 | if (!context->in_syscall) |
| 1977 | return; | 2160 | return; |
| 1978 | if (context->name_count | 2161 | |
| 1979 | && context->names[context->name_count-1].name | 2162 | list_for_each_entry_reverse(n, &context->names_list, list) { |
| 1980 | && context->names[context->name_count-1].name == name) | 2163 | if (n->name && (n->name == name)) |
| 1981 | idx = context->name_count - 1; | 2164 | goto out; |
| 1982 | else if (context->name_count > 1 | ||
| 1983 | && context->names[context->name_count-2].name | ||
| 1984 | && context->names[context->name_count-2].name == name) | ||
| 1985 | idx = context->name_count - 2; | ||
| 1986 | else { | ||
| 1987 | /* FIXME: how much do we care about inodes that have no | ||
| 1988 | * associated name? */ | ||
| 1989 | if (audit_inc_name_count(context, inode)) | ||
| 1990 | return; | ||
| 1991 | idx = context->name_count - 1; | ||
| 1992 | context->names[idx].name = NULL; | ||
| 1993 | } | 2165 | } |
| 2166 | |||
| 2167 | /* unable to find the name from a previous getname() */ | ||
| 2168 | n = audit_alloc_name(context); | ||
| 2169 | if (!n) | ||
| 2170 | return; | ||
| 2171 | out: | ||
| 1994 | handle_path(dentry); | 2172 | handle_path(dentry); |
| 1995 | audit_copy_inode(&context->names[idx], dentry, inode); | 2173 | audit_copy_inode(n, dentry, inode); |
| 1996 | } | 2174 | } |
| 1997 | 2175 | ||
| 1998 | /** | 2176 | /** |
| @@ -2011,11 +2189,11 @@ void __audit_inode(const char *name, const struct dentry *dentry) | |||
| 2011 | void __audit_inode_child(const struct dentry *dentry, | 2189 | void __audit_inode_child(const struct dentry *dentry, |
| 2012 | const struct inode *parent) | 2190 | const struct inode *parent) |
| 2013 | { | 2191 | { |
| 2014 | int idx; | ||
| 2015 | struct audit_context *context = current->audit_context; | 2192 | struct audit_context *context = current->audit_context; |
| 2016 | const char *found_parent = NULL, *found_child = NULL; | 2193 | const char *found_parent = NULL, *found_child = NULL; |
| 2017 | const struct inode *inode = dentry->d_inode; | 2194 | const struct inode *inode = dentry->d_inode; |
| 2018 | const char *dname = dentry->d_name.name; | 2195 | const char *dname = dentry->d_name.name; |
| 2196 | struct audit_names *n; | ||
| 2019 | int dirlen = 0; | 2197 | int dirlen = 0; |
| 2020 | 2198 | ||
| 2021 | if (!context->in_syscall) | 2199 | if (!context->in_syscall) |
| @@ -2025,9 +2203,7 @@ void __audit_inode_child(const struct dentry *dentry, | |||
| 2025 | handle_one(inode); | 2203 | handle_one(inode); |
| 2026 | 2204 | ||
| 2027 | /* parent is more likely, look for it first */ | 2205 | /* parent is more likely, look for it first */ |
| 2028 | for (idx = 0; idx < context->name_count; idx++) { | 2206 | list_for_each_entry(n, &context->names_list, list) { |
| 2029 | struct audit_names *n = &context->names[idx]; | ||
| 2030 | |||
| 2031 | if (!n->name) | 2207 | if (!n->name) |
| 2032 | continue; | 2208 | continue; |
| 2033 | 2209 | ||
| @@ -2040,9 +2216,7 @@ void __audit_inode_child(const struct dentry *dentry, | |||
| 2040 | } | 2216 | } |
| 2041 | 2217 | ||
| 2042 | /* no matching parent, look for matching child */ | 2218 | /* no matching parent, look for matching child */ |
| 2043 | for (idx = 0; idx < context->name_count; idx++) { | 2219 | list_for_each_entry(n, &context->names_list, list) { |
| 2044 | struct audit_names *n = &context->names[idx]; | ||
| 2045 | |||
| 2046 | if (!n->name) | 2220 | if (!n->name) |
| 2047 | continue; | 2221 | continue; |
| 2048 | 2222 | ||
| @@ -2060,34 +2234,29 @@ void __audit_inode_child(const struct dentry *dentry, | |||
| 2060 | 2234 | ||
| 2061 | add_names: | 2235 | add_names: |
| 2062 | if (!found_parent) { | 2236 | if (!found_parent) { |
| 2063 | if (audit_inc_name_count(context, parent)) | 2237 | n = audit_alloc_name(context); |
| 2238 | if (!n) | ||
| 2064 | return; | 2239 | return; |
| 2065 | idx = context->name_count - 1; | 2240 | audit_copy_inode(n, NULL, parent); |
| 2066 | context->names[idx].name = NULL; | ||
| 2067 | audit_copy_inode(&context->names[idx], NULL, parent); | ||
| 2068 | } | 2241 | } |
| 2069 | 2242 | ||
| 2070 | if (!found_child) { | 2243 | if (!found_child) { |
| 2071 | if (audit_inc_name_count(context, inode)) | 2244 | n = audit_alloc_name(context); |
| 2245 | if (!n) | ||
| 2072 | return; | 2246 | return; |
| 2073 | idx = context->name_count - 1; | ||
| 2074 | 2247 | ||
| 2075 | /* Re-use the name belonging to the slot for a matching parent | 2248 | /* Re-use the name belonging to the slot for a matching parent |
| 2076 | * directory. All names for this context are relinquished in | 2249 | * directory. All names for this context are relinquished in |
| 2077 | * audit_free_names() */ | 2250 | * audit_free_names() */ |
| 2078 | if (found_parent) { | 2251 | if (found_parent) { |
| 2079 | context->names[idx].name = found_parent; | 2252 | n->name = found_parent; |
| 2080 | context->names[idx].name_len = AUDIT_NAME_FULL; | 2253 | n->name_len = AUDIT_NAME_FULL; |
| 2081 | /* don't call __putname() */ | 2254 | /* don't call __putname() */ |
| 2082 | context->names[idx].name_put = 0; | 2255 | n->name_put = false; |
| 2083 | } else { | ||
| 2084 | context->names[idx].name = NULL; | ||
| 2085 | } | 2256 | } |
| 2086 | 2257 | ||
| 2087 | if (inode) | 2258 | if (inode) |
| 2088 | audit_copy_inode(&context->names[idx], NULL, inode); | 2259 | audit_copy_inode(n, NULL, inode); |
| 2089 | else | ||
| 2090 | context->names[idx].ino = (unsigned long)-1; | ||
| 2091 | } | 2260 | } |
| 2092 | } | 2261 | } |
| 2093 | EXPORT_SYMBOL_GPL(__audit_inode_child); | 2262 | EXPORT_SYMBOL_GPL(__audit_inode_child); |
| @@ -2121,19 +2290,28 @@ int auditsc_get_stamp(struct audit_context *ctx, | |||
| 2121 | static atomic_t session_id = ATOMIC_INIT(0); | 2290 | static atomic_t session_id = ATOMIC_INIT(0); |
| 2122 | 2291 | ||
| 2123 | /** | 2292 | /** |
| 2124 | * audit_set_loginuid - set a task's audit_context loginuid | 2293 | * audit_set_loginuid - set current task's audit_context loginuid |
| 2125 | * @task: task whose audit context is being modified | ||
| 2126 | * @loginuid: loginuid value | 2294 | * @loginuid: loginuid value |
| 2127 | * | 2295 | * |
| 2128 | * Returns 0. | 2296 | * Returns 0. |
| 2129 | * | 2297 | * |
| 2130 | * Called (set) from fs/proc/base.c::proc_loginuid_write(). | 2298 | * Called (set) from fs/proc/base.c::proc_loginuid_write(). |
| 2131 | */ | 2299 | */ |
| 2132 | int audit_set_loginuid(struct task_struct *task, uid_t loginuid) | 2300 | int audit_set_loginuid(uid_t loginuid) |
| 2133 | { | 2301 | { |
| 2134 | unsigned int sessionid = atomic_inc_return(&session_id); | 2302 | struct task_struct *task = current; |
| 2135 | struct audit_context *context = task->audit_context; | 2303 | struct audit_context *context = task->audit_context; |
| 2304 | unsigned int sessionid; | ||
| 2305 | |||
| 2306 | #ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE | ||
| 2307 | if (task->loginuid != -1) | ||
| 2308 | return -EPERM; | ||
| 2309 | #else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */ | ||
| 2310 | if (!capable(CAP_AUDIT_CONTROL)) | ||
| 2311 | return -EPERM; | ||
| 2312 | #endif /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */ | ||
| 2136 | 2313 | ||
| 2314 | sessionid = atomic_inc_return(&session_id); | ||
| 2137 | if (context && context->in_syscall) { | 2315 | if (context && context->in_syscall) { |
| 2138 | struct audit_buffer *ab; | 2316 | struct audit_buffer *ab; |
| 2139 | 2317 | ||
| @@ -2160,7 +2338,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid) | |||
| 2160 | * @attr: queue attributes | 2338 | * @attr: queue attributes |
| 2161 | * | 2339 | * |
| 2162 | */ | 2340 | */ |
| 2163 | void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr) | 2341 | void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr) |
| 2164 | { | 2342 | { |
| 2165 | struct audit_context *context = current->audit_context; | 2343 | struct audit_context *context = current->audit_context; |
| 2166 | 2344 | ||
| @@ -2260,7 +2438,7 @@ void __audit_ipc_obj(struct kern_ipc_perm *ipcp) | |||
| 2260 | * | 2438 | * |
| 2261 | * Called only after audit_ipc_obj(). | 2439 | * Called only after audit_ipc_obj(). |
| 2262 | */ | 2440 | */ |
| 2263 | void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode) | 2441 | void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode) |
| 2264 | { | 2442 | { |
| 2265 | struct audit_context *context = current->audit_context; | 2443 | struct audit_context *context = current->audit_context; |
| 2266 | 2444 | ||
| @@ -2271,14 +2449,11 @@ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mod | |||
| 2271 | context->ipc.has_perm = 1; | 2449 | context->ipc.has_perm = 1; |
| 2272 | } | 2450 | } |
| 2273 | 2451 | ||
| 2274 | int audit_bprm(struct linux_binprm *bprm) | 2452 | int __audit_bprm(struct linux_binprm *bprm) |
| 2275 | { | 2453 | { |
| 2276 | struct audit_aux_data_execve *ax; | 2454 | struct audit_aux_data_execve *ax; |
| 2277 | struct audit_context *context = current->audit_context; | 2455 | struct audit_context *context = current->audit_context; |
| 2278 | 2456 | ||
| 2279 | if (likely(!audit_enabled || !context || context->dummy)) | ||
| 2280 | return 0; | ||
| 2281 | |||
| 2282 | ax = kmalloc(sizeof(*ax), GFP_KERNEL); | 2457 | ax = kmalloc(sizeof(*ax), GFP_KERNEL); |
| 2283 | if (!ax) | 2458 | if (!ax) |
| 2284 | return -ENOMEM; | 2459 | return -ENOMEM; |
| @@ -2299,13 +2474,10 @@ int audit_bprm(struct linux_binprm *bprm) | |||
| 2299 | * @args: args array | 2474 | * @args: args array |
| 2300 | * | 2475 | * |
| 2301 | */ | 2476 | */ |
| 2302 | void audit_socketcall(int nargs, unsigned long *args) | 2477 | void __audit_socketcall(int nargs, unsigned long *args) |
| 2303 | { | 2478 | { |
| 2304 | struct audit_context *context = current->audit_context; | 2479 | struct audit_context *context = current->audit_context; |
| 2305 | 2480 | ||
| 2306 | if (likely(!context || context->dummy)) | ||
| 2307 | return; | ||
| 2308 | |||
| 2309 | context->type = AUDIT_SOCKETCALL; | 2481 | context->type = AUDIT_SOCKETCALL; |
| 2310 | context->socketcall.nargs = nargs; | 2482 | context->socketcall.nargs = nargs; |
| 2311 | memcpy(context->socketcall.args, args, nargs * sizeof(unsigned long)); | 2483 | memcpy(context->socketcall.args, args, nargs * sizeof(unsigned long)); |
| @@ -2331,13 +2503,10 @@ void __audit_fd_pair(int fd1, int fd2) | |||
| 2331 | * | 2503 | * |
| 2332 | * Returns 0 for success or NULL context or < 0 on error. | 2504 | * Returns 0 for success or NULL context or < 0 on error. |
| 2333 | */ | 2505 | */ |
| 2334 | int audit_sockaddr(int len, void *a) | 2506 | int __audit_sockaddr(int len, void *a) |
| 2335 | { | 2507 | { |
| 2336 | struct audit_context *context = current->audit_context; | 2508 | struct audit_context *context = current->audit_context; |
| 2337 | 2509 | ||
| 2338 | if (likely(!context || context->dummy)) | ||
| 2339 | return 0; | ||
| 2340 | |||
| 2341 | if (!context->sockaddr) { | 2510 | if (!context->sockaddr) { |
| 2342 | void *p = kmalloc(sizeof(struct sockaddr_storage), GFP_KERNEL); | 2511 | void *p = kmalloc(sizeof(struct sockaddr_storage), GFP_KERNEL); |
| 2343 | if (!p) | 2512 | if (!p) |
| @@ -2499,6 +2668,25 @@ void __audit_mmap_fd(int fd, int flags) | |||
| 2499 | context->type = AUDIT_MMAP; | 2668 | context->type = AUDIT_MMAP; |
| 2500 | } | 2669 | } |
| 2501 | 2670 | ||
| 2671 | static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) | ||
| 2672 | { | ||
| 2673 | uid_t auid, uid; | ||
| 2674 | gid_t gid; | ||
| 2675 | unsigned int sessionid; | ||
| 2676 | |||
| 2677 | auid = audit_get_loginuid(current); | ||
| 2678 | sessionid = audit_get_sessionid(current); | ||
| 2679 | current_uid_gid(&uid, &gid); | ||
| 2680 | |||
| 2681 | audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u", | ||
| 2682 | auid, uid, gid, sessionid); | ||
| 2683 | audit_log_task_context(ab); | ||
| 2684 | audit_log_format(ab, " pid=%d comm=", current->pid); | ||
| 2685 | audit_log_untrustedstring(ab, current->comm); | ||
| 2686 | audit_log_format(ab, " reason="); | ||
| 2687 | audit_log_string(ab, reason); | ||
| 2688 | audit_log_format(ab, " sig=%ld", signr); | ||
| 2689 | } | ||
| 2502 | /** | 2690 | /** |
| 2503 | * audit_core_dumps - record information about processes that end abnormally | 2691 | * audit_core_dumps - record information about processes that end abnormally |
| 2504 | * @signr: signal value | 2692 | * @signr: signal value |
| @@ -2509,10 +2697,6 @@ void __audit_mmap_fd(int fd, int flags) | |||
| 2509 | void audit_core_dumps(long signr) | 2697 | void audit_core_dumps(long signr) |
| 2510 | { | 2698 | { |
| 2511 | struct audit_buffer *ab; | 2699 | struct audit_buffer *ab; |
| 2512 | u32 sid; | ||
| 2513 | uid_t auid = audit_get_loginuid(current), uid; | ||
| 2514 | gid_t gid; | ||
| 2515 | unsigned int sessionid = audit_get_sessionid(current); | ||
| 2516 | 2700 | ||
| 2517 | if (!audit_enabled) | 2701 | if (!audit_enabled) |
| 2518 | return; | 2702 | return; |
| @@ -2521,24 +2705,17 @@ void audit_core_dumps(long signr) | |||
| 2521 | return; | 2705 | return; |
| 2522 | 2706 | ||
| 2523 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); | 2707 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); |
| 2524 | current_uid_gid(&uid, &gid); | 2708 | audit_log_abend(ab, "memory violation", signr); |
| 2525 | audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u", | 2709 | audit_log_end(ab); |
| 2526 | auid, uid, gid, sessionid); | 2710 | } |
| 2527 | security_task_getsecid(current, &sid); | ||
| 2528 | if (sid) { | ||
| 2529 | char *ctx = NULL; | ||
| 2530 | u32 len; | ||
| 2531 | 2711 | ||
| 2532 | if (security_secid_to_secctx(sid, &ctx, &len)) | 2712 | void __audit_seccomp(unsigned long syscall) |
| 2533 | audit_log_format(ab, " ssid=%u", sid); | 2713 | { |
| 2534 | else { | 2714 | struct audit_buffer *ab; |
| 2535 | audit_log_format(ab, " subj=%s", ctx); | 2715 | |
| 2536 | security_release_secctx(ctx, len); | 2716 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); |
| 2537 | } | 2717 | audit_log_abend(ab, "seccomp", SIGKILL); |
| 2538 | } | 2718 | audit_log_format(ab, " syscall=%ld", syscall); |
| 2539 | audit_log_format(ab, " pid=%d comm=", current->pid); | ||
| 2540 | audit_log_untrustedstring(ab, current->comm); | ||
| 2541 | audit_log_format(ab, " sig=%ld", signr); | ||
| 2542 | audit_log_end(ab); | 2719 | audit_log_end(ab); |
| 2543 | } | 2720 | } |
| 2544 | 2721 | ||
diff --git a/kernel/capability.c b/kernel/capability.c index b463871a4e69..3f1adb6c6470 100644 --- a/kernel/capability.c +++ b/kernel/capability.c | |||
| @@ -287,74 +287,84 @@ error: | |||
| 287 | } | 287 | } |
| 288 | 288 | ||
| 289 | /** | 289 | /** |
| 290 | * has_capability - Does a task have a capability in init_user_ns | 290 | * has_ns_capability - Does a task have a capability in a specific user ns |
| 291 | * @t: The task in question | 291 | * @t: The task in question |
| 292 | * @ns: target user namespace | ||
| 292 | * @cap: The capability to be tested for | 293 | * @cap: The capability to be tested for |
| 293 | * | 294 | * |
| 294 | * Return true if the specified task has the given superior capability | 295 | * Return true if the specified task has the given superior capability |
| 295 | * currently in effect to the initial user namespace, false if not. | 296 | * currently in effect to the specified user namespace, false if not. |
| 296 | * | 297 | * |
| 297 | * Note that this does not set PF_SUPERPRIV on the task. | 298 | * Note that this does not set PF_SUPERPRIV on the task. |
| 298 | */ | 299 | */ |
| 299 | bool has_capability(struct task_struct *t, int cap) | 300 | bool has_ns_capability(struct task_struct *t, |
| 301 | struct user_namespace *ns, int cap) | ||
| 300 | { | 302 | { |
| 301 | int ret = security_real_capable(t, &init_user_ns, cap); | 303 | int ret; |
| 304 | |||
| 305 | rcu_read_lock(); | ||
| 306 | ret = security_capable(__task_cred(t), ns, cap); | ||
| 307 | rcu_read_unlock(); | ||
| 302 | 308 | ||
| 303 | return (ret == 0); | 309 | return (ret == 0); |
| 304 | } | 310 | } |
| 305 | 311 | ||
| 306 | /** | 312 | /** |
| 307 | * has_capability - Does a task have a capability in a specific user ns | 313 | * has_capability - Does a task have a capability in init_user_ns |
| 308 | * @t: The task in question | 314 | * @t: The task in question |
| 309 | * @ns: target user namespace | ||
| 310 | * @cap: The capability to be tested for | 315 | * @cap: The capability to be tested for |
| 311 | * | 316 | * |
| 312 | * Return true if the specified task has the given superior capability | 317 | * Return true if the specified task has the given superior capability |
| 313 | * currently in effect to the specified user namespace, false if not. | 318 | * currently in effect to the initial user namespace, false if not. |
| 314 | * | 319 | * |
| 315 | * Note that this does not set PF_SUPERPRIV on the task. | 320 | * Note that this does not set PF_SUPERPRIV on the task. |
| 316 | */ | 321 | */ |
| 317 | bool has_ns_capability(struct task_struct *t, | 322 | bool has_capability(struct task_struct *t, int cap) |
| 318 | struct user_namespace *ns, int cap) | ||
| 319 | { | 323 | { |
| 320 | int ret = security_real_capable(t, ns, cap); | 324 | return has_ns_capability(t, &init_user_ns, cap); |
| 321 | |||
| 322 | return (ret == 0); | ||
| 323 | } | 325 | } |
| 324 | 326 | ||
| 325 | /** | 327 | /** |
| 326 | * has_capability_noaudit - Does a task have a capability (unaudited) | 328 | * has_ns_capability_noaudit - Does a task have a capability (unaudited) |
| 329 | * in a specific user ns. | ||
| 327 | * @t: The task in question | 330 | * @t: The task in question |
| 331 | * @ns: target user namespace | ||
| 328 | * @cap: The capability to be tested for | 332 | * @cap: The capability to be tested for |
| 329 | * | 333 | * |
| 330 | * Return true if the specified task has the given superior capability | 334 | * Return true if the specified task has the given superior capability |
| 331 | * currently in effect to init_user_ns, false if not. Don't write an | 335 | * currently in effect to the specified user namespace, false if not. |
| 332 | * audit message for the check. | 336 | * Do not write an audit message for the check. |
| 333 | * | 337 | * |
| 334 | * Note that this does not set PF_SUPERPRIV on the task. | 338 | * Note that this does not set PF_SUPERPRIV on the task. |
| 335 | */ | 339 | */ |
| 336 | bool has_capability_noaudit(struct task_struct *t, int cap) | 340 | bool has_ns_capability_noaudit(struct task_struct *t, |
| 341 | struct user_namespace *ns, int cap) | ||
| 337 | { | 342 | { |
| 338 | int ret = security_real_capable_noaudit(t, &init_user_ns, cap); | 343 | int ret; |
| 344 | |||
| 345 | rcu_read_lock(); | ||
| 346 | ret = security_capable_noaudit(__task_cred(t), ns, cap); | ||
| 347 | rcu_read_unlock(); | ||
| 339 | 348 | ||
| 340 | return (ret == 0); | 349 | return (ret == 0); |
| 341 | } | 350 | } |
| 342 | 351 | ||
| 343 | /** | 352 | /** |
| 344 | * capable - Determine if the current task has a superior capability in effect | 353 | * has_capability_noaudit - Does a task have a capability (unaudited) in the |
| 354 | * initial user ns | ||
| 355 | * @t: The task in question | ||
| 345 | * @cap: The capability to be tested for | 356 | * @cap: The capability to be tested for |
| 346 | * | 357 | * |
| 347 | * Return true if the current task has the given superior capability currently | 358 | * Return true if the specified task has the given superior capability |
| 348 | * available for use, false if not. | 359 | * currently in effect to init_user_ns, false if not. Don't write an |
| 360 | * audit message for the check. | ||
| 349 | * | 361 | * |
| 350 | * This sets PF_SUPERPRIV on the task if the capability is available on the | 362 | * Note that this does not set PF_SUPERPRIV on the task. |
| 351 | * assumption that it's about to be used. | ||
| 352 | */ | 363 | */ |
| 353 | bool capable(int cap) | 364 | bool has_capability_noaudit(struct task_struct *t, int cap) |
| 354 | { | 365 | { |
| 355 | return ns_capable(&init_user_ns, cap); | 366 | return has_ns_capability_noaudit(t, &init_user_ns, cap); |
| 356 | } | 367 | } |
| 357 | EXPORT_SYMBOL(capable); | ||
| 358 | 368 | ||
| 359 | /** | 369 | /** |
| 360 | * ns_capable - Determine if the current task has a superior capability in effect | 370 | * ns_capable - Determine if the current task has a superior capability in effect |
| @@ -374,7 +384,7 @@ bool ns_capable(struct user_namespace *ns, int cap) | |||
| 374 | BUG(); | 384 | BUG(); |
| 375 | } | 385 | } |
| 376 | 386 | ||
| 377 | if (security_capable(ns, current_cred(), cap) == 0) { | 387 | if (security_capable(current_cred(), ns, cap) == 0) { |
| 378 | current->flags |= PF_SUPERPRIV; | 388 | current->flags |= PF_SUPERPRIV; |
| 379 | return true; | 389 | return true; |
| 380 | } | 390 | } |
| @@ -383,18 +393,20 @@ bool ns_capable(struct user_namespace *ns, int cap) | |||
| 383 | EXPORT_SYMBOL(ns_capable); | 393 | EXPORT_SYMBOL(ns_capable); |
| 384 | 394 | ||
| 385 | /** | 395 | /** |
| 386 | * task_ns_capable - Determine whether current task has a superior | 396 | * capable - Determine if the current task has a superior capability in effect |
| 387 | * capability targeted at a specific task's user namespace. | 397 | * @cap: The capability to be tested for |
| 388 | * @t: The task whose user namespace is targeted. | 398 | * |
| 389 | * @cap: The capability in question. | 399 | * Return true if the current task has the given superior capability currently |
| 400 | * available for use, false if not. | ||
| 390 | * | 401 | * |
| 391 | * Return true if it does, false otherwise. | 402 | * This sets PF_SUPERPRIV on the task if the capability is available on the |
| 403 | * assumption that it's about to be used. | ||
| 392 | */ | 404 | */ |
| 393 | bool task_ns_capable(struct task_struct *t, int cap) | 405 | bool capable(int cap) |
| 394 | { | 406 | { |
| 395 | return ns_capable(task_cred_xxx(t, user)->user_ns, cap); | 407 | return ns_capable(&init_user_ns, cap); |
| 396 | } | 408 | } |
| 397 | EXPORT_SYMBOL(task_ns_capable); | 409 | EXPORT_SYMBOL(capable); |
| 398 | 410 | ||
| 399 | /** | 411 | /** |
| 400 | * nsown_capable - Check superior capability to one's own user_ns | 412 | * nsown_capable - Check superior capability to one's own user_ns |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a184470cf9b5..a5d3b5325f77 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -63,7 +63,24 @@ | |||
| 63 | 63 | ||
| 64 | #include <linux/atomic.h> | 64 | #include <linux/atomic.h> |
| 65 | 65 | ||
| 66 | /* | ||
| 67 | * cgroup_mutex is the master lock. Any modification to cgroup or its | ||
| 68 | * hierarchy must be performed while holding it. | ||
| 69 | * | ||
| 70 | * cgroup_root_mutex nests inside cgroup_mutex and should be held to modify | ||
| 71 | * cgroupfs_root of any cgroup hierarchy - subsys list, flags, | ||
| 72 | * release_agent_path and so on. Modifying requires both cgroup_mutex and | ||
| 73 | * cgroup_root_mutex. Readers can acquire either of the two. This is to | ||
| 74 | * break the following locking order cycle. | ||
| 75 | * | ||
| 76 | * A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem | ||
| 77 | * B. namespace_sem -> cgroup_mutex | ||
| 78 | * | ||
| 79 | * B happens only through cgroup_show_options() and using cgroup_root_mutex | ||
| 80 | * breaks it. | ||
| 81 | */ | ||
| 66 | static DEFINE_MUTEX(cgroup_mutex); | 82 | static DEFINE_MUTEX(cgroup_mutex); |
| 83 | static DEFINE_MUTEX(cgroup_root_mutex); | ||
| 67 | 84 | ||
| 68 | /* | 85 | /* |
| 69 | * Generate an array of cgroup subsystem pointers. At boot time, this is | 86 | * Generate an array of cgroup subsystem pointers. At boot time, this is |
| @@ -760,7 +777,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); | |||
| 760 | * -> cgroup_mkdir. | 777 | * -> cgroup_mkdir. |
| 761 | */ | 778 | */ |
| 762 | 779 | ||
| 763 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); | 780 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); |
| 764 | static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *); | 781 | static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *); |
| 765 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); | 782 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); |
| 766 | static int cgroup_populate_dir(struct cgroup *cgrp); | 783 | static int cgroup_populate_dir(struct cgroup *cgrp); |
| @@ -775,7 +792,7 @@ static struct backing_dev_info cgroup_backing_dev_info = { | |||
| 775 | static int alloc_css_id(struct cgroup_subsys *ss, | 792 | static int alloc_css_id(struct cgroup_subsys *ss, |
| 776 | struct cgroup *parent, struct cgroup *child); | 793 | struct cgroup *parent, struct cgroup *child); |
| 777 | 794 | ||
| 778 | static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) | 795 | static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) |
| 779 | { | 796 | { |
| 780 | struct inode *inode = new_inode(sb); | 797 | struct inode *inode = new_inode(sb); |
| 781 | 798 | ||
| @@ -921,7 +938,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry) | |||
| 921 | * | 938 | * |
| 922 | * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; | 939 | * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; |
| 923 | */ | 940 | */ |
| 924 | DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); | 941 | static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); |
| 925 | 942 | ||
| 926 | static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) | 943 | static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) |
| 927 | { | 944 | { |
| @@ -953,6 +970,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
| 953 | int i; | 970 | int i; |
| 954 | 971 | ||
| 955 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); | 972 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); |
| 973 | BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); | ||
| 956 | 974 | ||
| 957 | removed_bits = root->actual_subsys_bits & ~final_bits; | 975 | removed_bits = root->actual_subsys_bits & ~final_bits; |
| 958 | added_bits = final_bits & ~root->actual_subsys_bits; | 976 | added_bits = final_bits & ~root->actual_subsys_bits; |
| @@ -1038,12 +1056,12 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
| 1038 | return 0; | 1056 | return 0; |
| 1039 | } | 1057 | } |
| 1040 | 1058 | ||
| 1041 | static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) | 1059 | static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) |
| 1042 | { | 1060 | { |
| 1043 | struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info; | 1061 | struct cgroupfs_root *root = dentry->d_sb->s_fs_info; |
| 1044 | struct cgroup_subsys *ss; | 1062 | struct cgroup_subsys *ss; |
| 1045 | 1063 | ||
| 1046 | mutex_lock(&cgroup_mutex); | 1064 | mutex_lock(&cgroup_root_mutex); |
| 1047 | for_each_subsys(root, ss) | 1065 | for_each_subsys(root, ss) |
| 1048 | seq_printf(seq, ",%s", ss->name); | 1066 | seq_printf(seq, ",%s", ss->name); |
| 1049 | if (test_bit(ROOT_NOPREFIX, &root->flags)) | 1067 | if (test_bit(ROOT_NOPREFIX, &root->flags)) |
| @@ -1054,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 1054 | seq_puts(seq, ",clone_children"); | 1072 | seq_puts(seq, ",clone_children"); |
| 1055 | if (strlen(root->name)) | 1073 | if (strlen(root->name)) |
| 1056 | seq_printf(seq, ",name=%s", root->name); | 1074 | seq_printf(seq, ",name=%s", root->name); |
| 1057 | mutex_unlock(&cgroup_mutex); | 1075 | mutex_unlock(&cgroup_root_mutex); |
| 1058 | return 0; | 1076 | return 0; |
| 1059 | } | 1077 | } |
| 1060 | 1078 | ||
| @@ -1175,10 +1193,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1175 | 1193 | ||
| 1176 | /* | 1194 | /* |
| 1177 | * If the 'all' option was specified select all the subsystems, | 1195 | * If the 'all' option was specified select all the subsystems, |
| 1178 | * otherwise 'all, 'none' and a subsystem name options were not | 1196 | * otherwise if 'none', 'name=' and a subsystem name options |
| 1179 | * specified, let's default to 'all' | 1197 | * were not specified, let's default to 'all' |
| 1180 | */ | 1198 | */ |
| 1181 | if (all_ss || (!all_ss && !one_ss && !opts->none)) { | 1199 | if (all_ss || (!one_ss && !opts->none && !opts->name)) { |
| 1182 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 1200 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 1183 | struct cgroup_subsys *ss = subsys[i]; | 1201 | struct cgroup_subsys *ss = subsys[i]; |
| 1184 | if (ss == NULL) | 1202 | if (ss == NULL) |
| @@ -1269,6 +1287,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 1269 | 1287 | ||
| 1270 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); | 1288 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); |
| 1271 | mutex_lock(&cgroup_mutex); | 1289 | mutex_lock(&cgroup_mutex); |
| 1290 | mutex_lock(&cgroup_root_mutex); | ||
| 1272 | 1291 | ||
| 1273 | /* See what subsystems are wanted */ | 1292 | /* See what subsystems are wanted */ |
| 1274 | ret = parse_cgroupfs_options(data, &opts); | 1293 | ret = parse_cgroupfs_options(data, &opts); |
| @@ -1297,6 +1316,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 1297 | out_unlock: | 1316 | out_unlock: |
| 1298 | kfree(opts.release_agent); | 1317 | kfree(opts.release_agent); |
| 1299 | kfree(opts.name); | 1318 | kfree(opts.name); |
| 1319 | mutex_unlock(&cgroup_root_mutex); | ||
| 1300 | mutex_unlock(&cgroup_mutex); | 1320 | mutex_unlock(&cgroup_mutex); |
| 1301 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 1321 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
| 1302 | return ret; | 1322 | return ret; |
| @@ -1481,6 +1501,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1481 | int ret = 0; | 1501 | int ret = 0; |
| 1482 | struct super_block *sb; | 1502 | struct super_block *sb; |
| 1483 | struct cgroupfs_root *new_root; | 1503 | struct cgroupfs_root *new_root; |
| 1504 | struct inode *inode; | ||
| 1484 | 1505 | ||
| 1485 | /* First find the desired set of subsystems */ | 1506 | /* First find the desired set of subsystems */ |
| 1486 | mutex_lock(&cgroup_mutex); | 1507 | mutex_lock(&cgroup_mutex); |
| @@ -1514,7 +1535,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1514 | /* We used the new root structure, so this is a new hierarchy */ | 1535 | /* We used the new root structure, so this is a new hierarchy */ |
| 1515 | struct list_head tmp_cg_links; | 1536 | struct list_head tmp_cg_links; |
| 1516 | struct cgroup *root_cgrp = &root->top_cgroup; | 1537 | struct cgroup *root_cgrp = &root->top_cgroup; |
| 1517 | struct inode *inode; | ||
| 1518 | struct cgroupfs_root *existing_root; | 1538 | struct cgroupfs_root *existing_root; |
| 1519 | const struct cred *cred; | 1539 | const struct cred *cred; |
| 1520 | int i; | 1540 | int i; |
| @@ -1528,18 +1548,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1528 | 1548 | ||
| 1529 | mutex_lock(&inode->i_mutex); | 1549 | mutex_lock(&inode->i_mutex); |
| 1530 | mutex_lock(&cgroup_mutex); | 1550 | mutex_lock(&cgroup_mutex); |
| 1551 | mutex_lock(&cgroup_root_mutex); | ||
| 1531 | 1552 | ||
| 1532 | if (strlen(root->name)) { | 1553 | /* Check for name clashes with existing mounts */ |
| 1533 | /* Check for name clashes with existing mounts */ | 1554 | ret = -EBUSY; |
| 1534 | for_each_active_root(existing_root) { | 1555 | if (strlen(root->name)) |
| 1535 | if (!strcmp(existing_root->name, root->name)) { | 1556 | for_each_active_root(existing_root) |
| 1536 | ret = -EBUSY; | 1557 | if (!strcmp(existing_root->name, root->name)) |
| 1537 | mutex_unlock(&cgroup_mutex); | 1558 | goto unlock_drop; |
| 1538 | mutex_unlock(&inode->i_mutex); | ||
| 1539 | goto drop_new_super; | ||
| 1540 | } | ||
| 1541 | } | ||
| 1542 | } | ||
| 1543 | 1559 | ||
| 1544 | /* | 1560 | /* |
| 1545 | * We're accessing css_set_count without locking | 1561 | * We're accessing css_set_count without locking |
| @@ -1549,18 +1565,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1549 | * have some link structures left over | 1565 | * have some link structures left over |
| 1550 | */ | 1566 | */ |
| 1551 | ret = allocate_cg_links(css_set_count, &tmp_cg_links); | 1567 | ret = allocate_cg_links(css_set_count, &tmp_cg_links); |
| 1552 | if (ret) { | 1568 | if (ret) |
| 1553 | mutex_unlock(&cgroup_mutex); | 1569 | goto unlock_drop; |
| 1554 | mutex_unlock(&inode->i_mutex); | ||
| 1555 | goto drop_new_super; | ||
| 1556 | } | ||
| 1557 | 1570 | ||
| 1558 | ret = rebind_subsystems(root, root->subsys_bits); | 1571 | ret = rebind_subsystems(root, root->subsys_bits); |
| 1559 | if (ret == -EBUSY) { | 1572 | if (ret == -EBUSY) { |
| 1560 | mutex_unlock(&cgroup_mutex); | ||
| 1561 | mutex_unlock(&inode->i_mutex); | ||
| 1562 | free_cg_links(&tmp_cg_links); | 1573 | free_cg_links(&tmp_cg_links); |
| 1563 | goto drop_new_super; | 1574 | goto unlock_drop; |
| 1564 | } | 1575 | } |
| 1565 | /* | 1576 | /* |
| 1566 | * There must be no failure case after here, since rebinding | 1577 | * There must be no failure case after here, since rebinding |
| @@ -1599,6 +1610,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1599 | cred = override_creds(&init_cred); | 1610 | cred = override_creds(&init_cred); |
| 1600 | cgroup_populate_dir(root_cgrp); | 1611 | cgroup_populate_dir(root_cgrp); |
| 1601 | revert_creds(cred); | 1612 | revert_creds(cred); |
| 1613 | mutex_unlock(&cgroup_root_mutex); | ||
| 1602 | mutex_unlock(&cgroup_mutex); | 1614 | mutex_unlock(&cgroup_mutex); |
| 1603 | mutex_unlock(&inode->i_mutex); | 1615 | mutex_unlock(&inode->i_mutex); |
| 1604 | } else { | 1616 | } else { |
| @@ -1615,6 +1627,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1615 | kfree(opts.name); | 1627 | kfree(opts.name); |
| 1616 | return dget(sb->s_root); | 1628 | return dget(sb->s_root); |
| 1617 | 1629 | ||
| 1630 | unlock_drop: | ||
| 1631 | mutex_unlock(&cgroup_root_mutex); | ||
| 1632 | mutex_unlock(&cgroup_mutex); | ||
| 1633 | mutex_unlock(&inode->i_mutex); | ||
| 1618 | drop_new_super: | 1634 | drop_new_super: |
| 1619 | deactivate_locked_super(sb); | 1635 | deactivate_locked_super(sb); |
| 1620 | drop_modules: | 1636 | drop_modules: |
| @@ -1639,6 +1655,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
| 1639 | BUG_ON(!list_empty(&cgrp->sibling)); | 1655 | BUG_ON(!list_empty(&cgrp->sibling)); |
| 1640 | 1656 | ||
| 1641 | mutex_lock(&cgroup_mutex); | 1657 | mutex_lock(&cgroup_mutex); |
| 1658 | mutex_lock(&cgroup_root_mutex); | ||
| 1642 | 1659 | ||
| 1643 | /* Rebind all subsystems back to the default hierarchy */ | 1660 | /* Rebind all subsystems back to the default hierarchy */ |
| 1644 | ret = rebind_subsystems(root, 0); | 1661 | ret = rebind_subsystems(root, 0); |
| @@ -1664,6 +1681,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
| 1664 | root_count--; | 1681 | root_count--; |
| 1665 | } | 1682 | } |
| 1666 | 1683 | ||
| 1684 | mutex_unlock(&cgroup_root_mutex); | ||
| 1667 | mutex_unlock(&cgroup_mutex); | 1685 | mutex_unlock(&cgroup_mutex); |
| 1668 | 1686 | ||
| 1669 | kill_litter_super(sb); | 1687 | kill_litter_super(sb); |
| @@ -1740,11 +1758,90 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
| 1740 | EXPORT_SYMBOL_GPL(cgroup_path); | 1758 | EXPORT_SYMBOL_GPL(cgroup_path); |
| 1741 | 1759 | ||
| 1742 | /* | 1760 | /* |
| 1761 | * Control Group taskset | ||
| 1762 | */ | ||
| 1763 | struct task_and_cgroup { | ||
| 1764 | struct task_struct *task; | ||
| 1765 | struct cgroup *cgrp; | ||
| 1766 | }; | ||
| 1767 | |||
| 1768 | struct cgroup_taskset { | ||
| 1769 | struct task_and_cgroup single; | ||
| 1770 | struct flex_array *tc_array; | ||
| 1771 | int tc_array_len; | ||
| 1772 | int idx; | ||
| 1773 | struct cgroup *cur_cgrp; | ||
| 1774 | }; | ||
| 1775 | |||
| 1776 | /** | ||
| 1777 | * cgroup_taskset_first - reset taskset and return the first task | ||
| 1778 | * @tset: taskset of interest | ||
| 1779 | * | ||
| 1780 | * @tset iteration is initialized and the first task is returned. | ||
| 1781 | */ | ||
| 1782 | struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset) | ||
| 1783 | { | ||
| 1784 | if (tset->tc_array) { | ||
| 1785 | tset->idx = 0; | ||
| 1786 | return cgroup_taskset_next(tset); | ||
| 1787 | } else { | ||
| 1788 | tset->cur_cgrp = tset->single.cgrp; | ||
| 1789 | return tset->single.task; | ||
| 1790 | } | ||
| 1791 | } | ||
| 1792 | EXPORT_SYMBOL_GPL(cgroup_taskset_first); | ||
| 1793 | |||
| 1794 | /** | ||
| 1795 | * cgroup_taskset_next - iterate to the next task in taskset | ||
| 1796 | * @tset: taskset of interest | ||
| 1797 | * | ||
| 1798 | * Return the next task in @tset. Iteration must have been initialized | ||
| 1799 | * with cgroup_taskset_first(). | ||
| 1800 | */ | ||
| 1801 | struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset) | ||
| 1802 | { | ||
| 1803 | struct task_and_cgroup *tc; | ||
| 1804 | |||
| 1805 | if (!tset->tc_array || tset->idx >= tset->tc_array_len) | ||
| 1806 | return NULL; | ||
| 1807 | |||
| 1808 | tc = flex_array_get(tset->tc_array, tset->idx++); | ||
| 1809 | tset->cur_cgrp = tc->cgrp; | ||
| 1810 | return tc->task; | ||
| 1811 | } | ||
| 1812 | EXPORT_SYMBOL_GPL(cgroup_taskset_next); | ||
| 1813 | |||
| 1814 | /** | ||
| 1815 | * cgroup_taskset_cur_cgroup - return the matching cgroup for the current task | ||
| 1816 | * @tset: taskset of interest | ||
| 1817 | * | ||
| 1818 | * Return the cgroup for the current (last returned) task of @tset. This | ||
| 1819 | * function must be preceded by either cgroup_taskset_first() or | ||
| 1820 | * cgroup_taskset_next(). | ||
| 1821 | */ | ||
| 1822 | struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset) | ||
| 1823 | { | ||
| 1824 | return tset->cur_cgrp; | ||
| 1825 | } | ||
| 1826 | EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup); | ||
| 1827 | |||
| 1828 | /** | ||
| 1829 | * cgroup_taskset_size - return the number of tasks in taskset | ||
| 1830 | * @tset: taskset of interest | ||
| 1831 | */ | ||
| 1832 | int cgroup_taskset_size(struct cgroup_taskset *tset) | ||
| 1833 | { | ||
| 1834 | return tset->tc_array ? tset->tc_array_len : 1; | ||
| 1835 | } | ||
| 1836 | EXPORT_SYMBOL_GPL(cgroup_taskset_size); | ||
| 1837 | |||
| 1838 | |||
| 1839 | /* | ||
| 1743 | * cgroup_task_migrate - move a task from one cgroup to another. | 1840 | * cgroup_task_migrate - move a task from one cgroup to another. |
| 1744 | * | 1841 | * |
| 1745 | * 'guarantee' is set if the caller promises that a new css_set for the task | 1842 | * 'guarantee' is set if the caller promises that a new css_set for the task |
| 1746 | * will already exist. If not set, this function might sleep, and can fail with | 1843 | * will already exist. If not set, this function might sleep, and can fail with |
| 1747 | * -ENOMEM. Otherwise, it can only fail with -ESRCH. | 1844 | * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked. |
| 1748 | */ | 1845 | */ |
| 1749 | static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | 1846 | static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, |
| 1750 | struct task_struct *tsk, bool guarantee) | 1847 | struct task_struct *tsk, bool guarantee) |
| @@ -1753,14 +1850,12 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | |||
| 1753 | struct css_set *newcg; | 1850 | struct css_set *newcg; |
| 1754 | 1851 | ||
| 1755 | /* | 1852 | /* |
| 1756 | * get old css_set. we need to take task_lock and refcount it, because | 1853 | * We are synchronized through threadgroup_lock() against PF_EXITING |
| 1757 | * an exiting task can change its css_set to init_css_set and drop its | 1854 | * setting such that we can't race against cgroup_exit() changing the |
| 1758 | * old one without taking cgroup_mutex. | 1855 | * css_set to init_css_set and dropping the old one. |
| 1759 | */ | 1856 | */ |
| 1760 | task_lock(tsk); | 1857 | WARN_ON_ONCE(tsk->flags & PF_EXITING); |
| 1761 | oldcg = tsk->cgroups; | 1858 | oldcg = tsk->cgroups; |
| 1762 | get_css_set(oldcg); | ||
| 1763 | task_unlock(tsk); | ||
| 1764 | 1859 | ||
| 1765 | /* locate or allocate a new css_set for this task. */ | 1860 | /* locate or allocate a new css_set for this task. */ |
| 1766 | if (guarantee) { | 1861 | if (guarantee) { |
| @@ -1775,20 +1870,11 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | |||
| 1775 | might_sleep(); | 1870 | might_sleep(); |
| 1776 | /* find_css_set will give us newcg already referenced. */ | 1871 | /* find_css_set will give us newcg already referenced. */ |
| 1777 | newcg = find_css_set(oldcg, cgrp); | 1872 | newcg = find_css_set(oldcg, cgrp); |
| 1778 | if (!newcg) { | 1873 | if (!newcg) |
| 1779 | put_css_set(oldcg); | ||
| 1780 | return -ENOMEM; | 1874 | return -ENOMEM; |
| 1781 | } | ||
| 1782 | } | 1875 | } |
| 1783 | put_css_set(oldcg); | ||
| 1784 | 1876 | ||
| 1785 | /* if PF_EXITING is set, the tsk->cgroups pointer is no longer safe. */ | ||
| 1786 | task_lock(tsk); | 1877 | task_lock(tsk); |
| 1787 | if (tsk->flags & PF_EXITING) { | ||
| 1788 | task_unlock(tsk); | ||
| 1789 | put_css_set(newcg); | ||
| 1790 | return -ESRCH; | ||
| 1791 | } | ||
| 1792 | rcu_assign_pointer(tsk->cgroups, newcg); | 1878 | rcu_assign_pointer(tsk->cgroups, newcg); |
| 1793 | task_unlock(tsk); | 1879 | task_unlock(tsk); |
| 1794 | 1880 | ||
| @@ -1814,8 +1900,8 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | |||
| 1814 | * @cgrp: the cgroup the task is attaching to | 1900 | * @cgrp: the cgroup the task is attaching to |
| 1815 | * @tsk: the task to be attached | 1901 | * @tsk: the task to be attached |
| 1816 | * | 1902 | * |
| 1817 | * Call holding cgroup_mutex. May take task_lock of | 1903 | * Call with cgroup_mutex and threadgroup locked. May take task_lock of |
| 1818 | * the task 'tsk' during call. | 1904 | * @tsk during call. |
| 1819 | */ | 1905 | */ |
| 1820 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 1906 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
| 1821 | { | 1907 | { |
| @@ -1823,15 +1909,23 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
| 1823 | struct cgroup_subsys *ss, *failed_ss = NULL; | 1909 | struct cgroup_subsys *ss, *failed_ss = NULL; |
| 1824 | struct cgroup *oldcgrp; | 1910 | struct cgroup *oldcgrp; |
| 1825 | struct cgroupfs_root *root = cgrp->root; | 1911 | struct cgroupfs_root *root = cgrp->root; |
| 1912 | struct cgroup_taskset tset = { }; | ||
| 1913 | |||
| 1914 | /* @tsk either already exited or can't exit until the end */ | ||
| 1915 | if (tsk->flags & PF_EXITING) | ||
| 1916 | return -ESRCH; | ||
| 1826 | 1917 | ||
| 1827 | /* Nothing to do if the task is already in that cgroup */ | 1918 | /* Nothing to do if the task is already in that cgroup */ |
| 1828 | oldcgrp = task_cgroup_from_root(tsk, root); | 1919 | oldcgrp = task_cgroup_from_root(tsk, root); |
| 1829 | if (cgrp == oldcgrp) | 1920 | if (cgrp == oldcgrp) |
| 1830 | return 0; | 1921 | return 0; |
| 1831 | 1922 | ||
| 1923 | tset.single.task = tsk; | ||
| 1924 | tset.single.cgrp = oldcgrp; | ||
| 1925 | |||
| 1832 | for_each_subsys(root, ss) { | 1926 | for_each_subsys(root, ss) { |
| 1833 | if (ss->can_attach) { | 1927 | if (ss->can_attach) { |
| 1834 | retval = ss->can_attach(ss, cgrp, tsk); | 1928 | retval = ss->can_attach(ss, cgrp, &tset); |
| 1835 | if (retval) { | 1929 | if (retval) { |
| 1836 | /* | 1930 | /* |
| 1837 | * Remember on which subsystem the can_attach() | 1931 | * Remember on which subsystem the can_attach() |
| @@ -1843,13 +1937,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
| 1843 | goto out; | 1937 | goto out; |
| 1844 | } | 1938 | } |
| 1845 | } | 1939 | } |
| 1846 | if (ss->can_attach_task) { | ||
| 1847 | retval = ss->can_attach_task(cgrp, tsk); | ||
| 1848 | if (retval) { | ||
| 1849 | failed_ss = ss; | ||
| 1850 | goto out; | ||
| 1851 | } | ||
| 1852 | } | ||
| 1853 | } | 1940 | } |
| 1854 | 1941 | ||
| 1855 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false); | 1942 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false); |
| @@ -1857,12 +1944,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
| 1857 | goto out; | 1944 | goto out; |
| 1858 | 1945 | ||
| 1859 | for_each_subsys(root, ss) { | 1946 | for_each_subsys(root, ss) { |
| 1860 | if (ss->pre_attach) | ||
| 1861 | ss->pre_attach(cgrp); | ||
| 1862 | if (ss->attach_task) | ||
| 1863 | ss->attach_task(cgrp, tsk); | ||
| 1864 | if (ss->attach) | 1947 | if (ss->attach) |
| 1865 | ss->attach(ss, cgrp, oldcgrp, tsk); | 1948 | ss->attach(ss, cgrp, &tset); |
| 1866 | } | 1949 | } |
| 1867 | 1950 | ||
| 1868 | synchronize_rcu(); | 1951 | synchronize_rcu(); |
| @@ -1884,7 +1967,7 @@ out: | |||
| 1884 | */ | 1967 | */ |
| 1885 | break; | 1968 | break; |
| 1886 | if (ss->cancel_attach) | 1969 | if (ss->cancel_attach) |
| 1887 | ss->cancel_attach(ss, cgrp, tsk); | 1970 | ss->cancel_attach(ss, cgrp, &tset); |
| 1888 | } | 1971 | } |
| 1889 | } | 1972 | } |
| 1890 | return retval; | 1973 | return retval; |
| @@ -1935,23 +2018,17 @@ static bool css_set_check_fetched(struct cgroup *cgrp, | |||
| 1935 | 2018 | ||
| 1936 | read_lock(&css_set_lock); | 2019 | read_lock(&css_set_lock); |
| 1937 | newcg = find_existing_css_set(cg, cgrp, template); | 2020 | newcg = find_existing_css_set(cg, cgrp, template); |
| 1938 | if (newcg) | ||
| 1939 | get_css_set(newcg); | ||
| 1940 | read_unlock(&css_set_lock); | 2021 | read_unlock(&css_set_lock); |
| 1941 | 2022 | ||
| 1942 | /* doesn't exist at all? */ | 2023 | /* doesn't exist at all? */ |
| 1943 | if (!newcg) | 2024 | if (!newcg) |
| 1944 | return false; | 2025 | return false; |
| 1945 | /* see if it's already in the list */ | 2026 | /* see if it's already in the list */ |
| 1946 | list_for_each_entry(cg_entry, newcg_list, links) { | 2027 | list_for_each_entry(cg_entry, newcg_list, links) |
| 1947 | if (cg_entry->cg == newcg) { | 2028 | if (cg_entry->cg == newcg) |
| 1948 | put_css_set(newcg); | ||
| 1949 | return true; | 2029 | return true; |
| 1950 | } | ||
| 1951 | } | ||
| 1952 | 2030 | ||
| 1953 | /* not found */ | 2031 | /* not found */ |
| 1954 | put_css_set(newcg); | ||
| 1955 | return false; | 2032 | return false; |
| 1956 | } | 2033 | } |
| 1957 | 2034 | ||
| @@ -1985,21 +2062,21 @@ static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg, | |||
| 1985 | * @cgrp: the cgroup to attach to | 2062 | * @cgrp: the cgroup to attach to |
| 1986 | * @leader: the threadgroup leader task_struct of the group to be attached | 2063 | * @leader: the threadgroup leader task_struct of the group to be attached |
| 1987 | * | 2064 | * |
| 1988 | * Call holding cgroup_mutex and the threadgroup_fork_lock of the leader. Will | 2065 | * Call holding cgroup_mutex and the group_rwsem of the leader. Will take |
| 1989 | * take task_lock of each thread in leader's threadgroup individually in turn. | 2066 | * task_lock of each thread in leader's threadgroup individually in turn. |
| 1990 | */ | 2067 | */ |
| 1991 | int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | 2068 | static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) |
| 1992 | { | 2069 | { |
| 1993 | int retval, i, group_size; | 2070 | int retval, i, group_size; |
| 1994 | struct cgroup_subsys *ss, *failed_ss = NULL; | 2071 | struct cgroup_subsys *ss, *failed_ss = NULL; |
| 1995 | bool cancel_failed_ss = false; | ||
| 1996 | /* guaranteed to be initialized later, but the compiler needs this */ | 2072 | /* guaranteed to be initialized later, but the compiler needs this */ |
| 1997 | struct cgroup *oldcgrp = NULL; | ||
| 1998 | struct css_set *oldcg; | 2073 | struct css_set *oldcg; |
| 1999 | struct cgroupfs_root *root = cgrp->root; | 2074 | struct cgroupfs_root *root = cgrp->root; |
| 2000 | /* threadgroup list cursor and array */ | 2075 | /* threadgroup list cursor and array */ |
| 2001 | struct task_struct *tsk; | 2076 | struct task_struct *tsk; |
| 2077 | struct task_and_cgroup *tc; | ||
| 2002 | struct flex_array *group; | 2078 | struct flex_array *group; |
| 2079 | struct cgroup_taskset tset = { }; | ||
| 2003 | /* | 2080 | /* |
| 2004 | * we need to make sure we have css_sets for all the tasks we're | 2081 | * we need to make sure we have css_sets for all the tasks we're |
| 2005 | * going to move -before- we actually start moving them, so that in | 2082 | * going to move -before- we actually start moving them, so that in |
| @@ -2012,13 +2089,12 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
| 2012 | * step 0: in order to do expensive, possibly blocking operations for | 2089 | * step 0: in order to do expensive, possibly blocking operations for |
| 2013 | * every thread, we cannot iterate the thread group list, since it needs | 2090 | * every thread, we cannot iterate the thread group list, since it needs |
| 2014 | * rcu or tasklist locked. instead, build an array of all threads in the | 2091 | * rcu or tasklist locked. instead, build an array of all threads in the |
| 2015 | * group - threadgroup_fork_lock prevents new threads from appearing, | 2092 | * group - group_rwsem prevents new threads from appearing, and if |
| 2016 | * and if threads exit, this will just be an over-estimate. | 2093 | * threads exit, this will just be an over-estimate. |
| 2017 | */ | 2094 | */ |
| 2018 | group_size = get_nr_threads(leader); | 2095 | group_size = get_nr_threads(leader); |
| 2019 | /* flex_array supports very large thread-groups better than kmalloc. */ | 2096 | /* flex_array supports very large thread-groups better than kmalloc. */ |
| 2020 | group = flex_array_alloc(sizeof(struct task_struct *), group_size, | 2097 | group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL); |
| 2021 | GFP_KERNEL); | ||
| 2022 | if (!group) | 2098 | if (!group) |
| 2023 | return -ENOMEM; | 2099 | return -ENOMEM; |
| 2024 | /* pre-allocate to guarantee space while iterating in rcu read-side. */ | 2100 | /* pre-allocate to guarantee space while iterating in rcu read-side. */ |
| @@ -2040,49 +2116,53 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
| 2040 | retval = -EAGAIN; | 2116 | retval = -EAGAIN; |
| 2041 | goto out_free_group_list; | 2117 | goto out_free_group_list; |
| 2042 | } | 2118 | } |
| 2043 | /* take a reference on each task in the group to go in the array. */ | 2119 | |
| 2044 | tsk = leader; | 2120 | tsk = leader; |
| 2045 | i = 0; | 2121 | i = 0; |
| 2046 | do { | 2122 | do { |
| 2123 | struct task_and_cgroup ent; | ||
| 2124 | |||
| 2125 | /* @tsk either already exited or can't exit until the end */ | ||
| 2126 | if (tsk->flags & PF_EXITING) | ||
| 2127 | continue; | ||
| 2128 | |||
| 2047 | /* as per above, nr_threads may decrease, but not increase. */ | 2129 | /* as per above, nr_threads may decrease, but not increase. */ |
| 2048 | BUG_ON(i >= group_size); | 2130 | BUG_ON(i >= group_size); |
| 2049 | get_task_struct(tsk); | ||
| 2050 | /* | 2131 | /* |
| 2051 | * saying GFP_ATOMIC has no effect here because we did prealloc | 2132 | * saying GFP_ATOMIC has no effect here because we did prealloc |
| 2052 | * earlier, but it's good form to communicate our expectations. | 2133 | * earlier, but it's good form to communicate our expectations. |
| 2053 | */ | 2134 | */ |
| 2054 | retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC); | 2135 | ent.task = tsk; |
| 2136 | ent.cgrp = task_cgroup_from_root(tsk, root); | ||
| 2137 | /* nothing to do if this task is already in the cgroup */ | ||
| 2138 | if (ent.cgrp == cgrp) | ||
| 2139 | continue; | ||
| 2140 | retval = flex_array_put(group, i, &ent, GFP_ATOMIC); | ||
| 2055 | BUG_ON(retval != 0); | 2141 | BUG_ON(retval != 0); |
| 2056 | i++; | 2142 | i++; |
| 2057 | } while_each_thread(leader, tsk); | 2143 | } while_each_thread(leader, tsk); |
| 2058 | /* remember the number of threads in the array for later. */ | 2144 | /* remember the number of threads in the array for later. */ |
| 2059 | group_size = i; | 2145 | group_size = i; |
| 2146 | tset.tc_array = group; | ||
| 2147 | tset.tc_array_len = group_size; | ||
| 2060 | read_unlock(&tasklist_lock); | 2148 | read_unlock(&tasklist_lock); |
| 2061 | 2149 | ||
| 2150 | /* methods shouldn't be called if no task is actually migrating */ | ||
| 2151 | retval = 0; | ||
| 2152 | if (!group_size) | ||
| 2153 | goto out_free_group_list; | ||
| 2154 | |||
| 2062 | /* | 2155 | /* |
| 2063 | * step 1: check that we can legitimately attach to the cgroup. | 2156 | * step 1: check that we can legitimately attach to the cgroup. |
| 2064 | */ | 2157 | */ |
| 2065 | for_each_subsys(root, ss) { | 2158 | for_each_subsys(root, ss) { |
| 2066 | if (ss->can_attach) { | 2159 | if (ss->can_attach) { |
| 2067 | retval = ss->can_attach(ss, cgrp, leader); | 2160 | retval = ss->can_attach(ss, cgrp, &tset); |
| 2068 | if (retval) { | 2161 | if (retval) { |
| 2069 | failed_ss = ss; | 2162 | failed_ss = ss; |
| 2070 | goto out_cancel_attach; | 2163 | goto out_cancel_attach; |
| 2071 | } | 2164 | } |
| 2072 | } | 2165 | } |
| 2073 | /* a callback to be run on every thread in the threadgroup. */ | ||
| 2074 | if (ss->can_attach_task) { | ||
| 2075 | /* run on each task in the threadgroup. */ | ||
| 2076 | for (i = 0; i < group_size; i++) { | ||
| 2077 | tsk = flex_array_get_ptr(group, i); | ||
| 2078 | retval = ss->can_attach_task(cgrp, tsk); | ||
| 2079 | if (retval) { | ||
| 2080 | failed_ss = ss; | ||
| 2081 | cancel_failed_ss = true; | ||
| 2082 | goto out_cancel_attach; | ||
| 2083 | } | ||
| 2084 | } | ||
| 2085 | } | ||
| 2086 | } | 2166 | } |
| 2087 | 2167 | ||
| 2088 | /* | 2168 | /* |
| @@ -2091,67 +2171,36 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
| 2091 | */ | 2171 | */ |
| 2092 | INIT_LIST_HEAD(&newcg_list); | 2172 | INIT_LIST_HEAD(&newcg_list); |
| 2093 | for (i = 0; i < group_size; i++) { | 2173 | for (i = 0; i < group_size; i++) { |
| 2094 | tsk = flex_array_get_ptr(group, i); | 2174 | tc = flex_array_get(group, i); |
| 2095 | /* nothing to do if this task is already in the cgroup */ | 2175 | oldcg = tc->task->cgroups; |
| 2096 | oldcgrp = task_cgroup_from_root(tsk, root); | 2176 | |
| 2097 | if (cgrp == oldcgrp) | 2177 | /* if we don't already have it in the list get a new one */ |
| 2098 | continue; | 2178 | if (!css_set_check_fetched(cgrp, tc->task, oldcg, |
| 2099 | /* get old css_set pointer */ | 2179 | &newcg_list)) { |
| 2100 | task_lock(tsk); | ||
| 2101 | oldcg = tsk->cgroups; | ||
| 2102 | get_css_set(oldcg); | ||
| 2103 | task_unlock(tsk); | ||
| 2104 | /* see if the new one for us is already in the list? */ | ||
| 2105 | if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) { | ||
| 2106 | /* was already there, nothing to do. */ | ||
| 2107 | put_css_set(oldcg); | ||
| 2108 | } else { | ||
| 2109 | /* we don't already have it. get new one. */ | ||
| 2110 | retval = css_set_prefetch(cgrp, oldcg, &newcg_list); | 2180 | retval = css_set_prefetch(cgrp, oldcg, &newcg_list); |
| 2111 | put_css_set(oldcg); | ||
| 2112 | if (retval) | 2181 | if (retval) |
| 2113 | goto out_list_teardown; | 2182 | goto out_list_teardown; |
| 2114 | } | 2183 | } |
| 2115 | } | 2184 | } |
| 2116 | 2185 | ||
| 2117 | /* | 2186 | /* |
| 2118 | * step 3: now that we're guaranteed success wrt the css_sets, proceed | 2187 | * step 3: now that we're guaranteed success wrt the css_sets, |
| 2119 | * to move all tasks to the new cgroup, calling ss->attach_task for each | 2188 | * proceed to move all tasks to the new cgroup. There are no |
| 2120 | * one along the way. there are no failure cases after here, so this is | 2189 | * failure cases after here, so this is the commit point. |
| 2121 | * the commit point. | ||
| 2122 | */ | 2190 | */ |
| 2123 | for_each_subsys(root, ss) { | ||
| 2124 | if (ss->pre_attach) | ||
| 2125 | ss->pre_attach(cgrp); | ||
| 2126 | } | ||
| 2127 | for (i = 0; i < group_size; i++) { | 2191 | for (i = 0; i < group_size; i++) { |
| 2128 | tsk = flex_array_get_ptr(group, i); | 2192 | tc = flex_array_get(group, i); |
| 2129 | /* leave current thread as it is if it's already there */ | 2193 | retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true); |
| 2130 | oldcgrp = task_cgroup_from_root(tsk, root); | 2194 | BUG_ON(retval); |
| 2131 | if (cgrp == oldcgrp) | ||
| 2132 | continue; | ||
| 2133 | /* if the thread is PF_EXITING, it can just get skipped. */ | ||
| 2134 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true); | ||
| 2135 | if (retval == 0) { | ||
| 2136 | /* attach each task to each subsystem */ | ||
| 2137 | for_each_subsys(root, ss) { | ||
| 2138 | if (ss->attach_task) | ||
| 2139 | ss->attach_task(cgrp, tsk); | ||
| 2140 | } | ||
| 2141 | } else { | ||
| 2142 | BUG_ON(retval != -ESRCH); | ||
| 2143 | } | ||
| 2144 | } | 2195 | } |
| 2145 | /* nothing is sensitive to fork() after this point. */ | 2196 | /* nothing is sensitive to fork() after this point. */ |
| 2146 | 2197 | ||
| 2147 | /* | 2198 | /* |
| 2148 | * step 4: do expensive, non-thread-specific subsystem callbacks. | 2199 | * step 4: do subsystem attach callbacks. |
| 2149 | * TODO: if ever a subsystem needs to know the oldcgrp for each task | ||
| 2150 | * being moved, this call will need to be reworked to communicate that. | ||
| 2151 | */ | 2200 | */ |
| 2152 | for_each_subsys(root, ss) { | 2201 | for_each_subsys(root, ss) { |
| 2153 | if (ss->attach) | 2202 | if (ss->attach) |
| 2154 | ss->attach(ss, cgrp, oldcgrp, leader); | 2203 | ss->attach(ss, cgrp, &tset); |
| 2155 | } | 2204 | } |
| 2156 | 2205 | ||
| 2157 | /* | 2206 | /* |
| @@ -2171,20 +2220,12 @@ out_cancel_attach: | |||
| 2171 | /* same deal as in cgroup_attach_task */ | 2220 | /* same deal as in cgroup_attach_task */ |
| 2172 | if (retval) { | 2221 | if (retval) { |
| 2173 | for_each_subsys(root, ss) { | 2222 | for_each_subsys(root, ss) { |
| 2174 | if (ss == failed_ss) { | 2223 | if (ss == failed_ss) |
| 2175 | if (cancel_failed_ss && ss->cancel_attach) | ||
| 2176 | ss->cancel_attach(ss, cgrp, leader); | ||
| 2177 | break; | 2224 | break; |
| 2178 | } | ||
| 2179 | if (ss->cancel_attach) | 2225 | if (ss->cancel_attach) |
| 2180 | ss->cancel_attach(ss, cgrp, leader); | 2226 | ss->cancel_attach(ss, cgrp, &tset); |
| 2181 | } | 2227 | } |
| 2182 | } | 2228 | } |
| 2183 | /* clean up the array of referenced threads in the group. */ | ||
| 2184 | for (i = 0; i < group_size; i++) { | ||
| 2185 | tsk = flex_array_get_ptr(group, i); | ||
| 2186 | put_task_struct(tsk); | ||
| 2187 | } | ||
| 2188 | out_free_group_list: | 2229 | out_free_group_list: |
| 2189 | flex_array_free(group); | 2230 | flex_array_free(group); |
| 2190 | return retval; | 2231 | return retval; |
| @@ -2192,8 +2233,8 @@ out_free_group_list: | |||
| 2192 | 2233 | ||
| 2193 | /* | 2234 | /* |
| 2194 | * Find the task_struct of the task to attach by vpid and pass it along to the | 2235 | * Find the task_struct of the task to attach by vpid and pass it along to the |
| 2195 | * function to attach either it or all tasks in its threadgroup. Will take | 2236 | * function to attach either it or all tasks in its threadgroup. Will lock |
| 2196 | * cgroup_mutex; may take task_lock of task. | 2237 | * cgroup_mutex and threadgroup; may take task_lock of task. |
| 2197 | */ | 2238 | */ |
| 2198 | static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) | 2239 | static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) |
| 2199 | { | 2240 | { |
| @@ -2220,13 +2261,7 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) | |||
| 2220 | * detect it later. | 2261 | * detect it later. |
| 2221 | */ | 2262 | */ |
| 2222 | tsk = tsk->group_leader; | 2263 | tsk = tsk->group_leader; |
| 2223 | } else if (tsk->flags & PF_EXITING) { | ||
| 2224 | /* optimization for the single-task-only case */ | ||
| 2225 | rcu_read_unlock(); | ||
| 2226 | cgroup_unlock(); | ||
| 2227 | return -ESRCH; | ||
| 2228 | } | 2264 | } |
| 2229 | |||
| 2230 | /* | 2265 | /* |
| 2231 | * even if we're attaching all tasks in the thread group, we | 2266 | * even if we're attaching all tasks in the thread group, we |
| 2232 | * only need to check permissions on one of them. | 2267 | * only need to check permissions on one of them. |
| @@ -2249,13 +2284,15 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) | |||
| 2249 | get_task_struct(tsk); | 2284 | get_task_struct(tsk); |
| 2250 | } | 2285 | } |
| 2251 | 2286 | ||
| 2252 | if (threadgroup) { | 2287 | threadgroup_lock(tsk); |
| 2253 | threadgroup_fork_write_lock(tsk); | 2288 | |
| 2289 | if (threadgroup) | ||
| 2254 | ret = cgroup_attach_proc(cgrp, tsk); | 2290 | ret = cgroup_attach_proc(cgrp, tsk); |
| 2255 | threadgroup_fork_write_unlock(tsk); | 2291 | else |
| 2256 | } else { | ||
| 2257 | ret = cgroup_attach_task(cgrp, tsk); | 2292 | ret = cgroup_attach_task(cgrp, tsk); |
| 2258 | } | 2293 | |
| 2294 | threadgroup_unlock(tsk); | ||
| 2295 | |||
| 2259 | put_task_struct(tsk); | 2296 | put_task_struct(tsk); |
| 2260 | cgroup_unlock(); | 2297 | cgroup_unlock(); |
| 2261 | return ret; | 2298 | return ret; |
| @@ -2306,7 +2343,9 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, | |||
| 2306 | return -EINVAL; | 2343 | return -EINVAL; |
| 2307 | if (!cgroup_lock_live_group(cgrp)) | 2344 | if (!cgroup_lock_live_group(cgrp)) |
| 2308 | return -ENODEV; | 2345 | return -ENODEV; |
| 2346 | mutex_lock(&cgroup_root_mutex); | ||
| 2309 | strcpy(cgrp->root->release_agent_path, buffer); | 2347 | strcpy(cgrp->root->release_agent_path, buffer); |
| 2348 | mutex_unlock(&cgroup_root_mutex); | ||
| 2310 | cgroup_unlock(); | 2349 | cgroup_unlock(); |
| 2311 | return 0; | 2350 | return 0; |
| 2312 | } | 2351 | } |
| @@ -2585,7 +2624,7 @@ static inline struct cftype *__file_cft(struct file *file) | |||
| 2585 | return __d_cft(file->f_dentry); | 2624 | return __d_cft(file->f_dentry); |
| 2586 | } | 2625 | } |
| 2587 | 2626 | ||
| 2588 | static int cgroup_create_file(struct dentry *dentry, mode_t mode, | 2627 | static int cgroup_create_file(struct dentry *dentry, umode_t mode, |
| 2589 | struct super_block *sb) | 2628 | struct super_block *sb) |
| 2590 | { | 2629 | { |
| 2591 | struct inode *inode; | 2630 | struct inode *inode; |
| @@ -2626,7 +2665,7 @@ static int cgroup_create_file(struct dentry *dentry, mode_t mode, | |||
| 2626 | * @mode: mode to set on new directory. | 2665 | * @mode: mode to set on new directory. |
| 2627 | */ | 2666 | */ |
| 2628 | static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, | 2667 | static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, |
| 2629 | mode_t mode) | 2668 | umode_t mode) |
| 2630 | { | 2669 | { |
| 2631 | struct dentry *parent; | 2670 | struct dentry *parent; |
| 2632 | int error = 0; | 2671 | int error = 0; |
| @@ -2653,9 +2692,9 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, | |||
| 2653 | * returns S_IRUGO if it has only a read handler | 2692 | * returns S_IRUGO if it has only a read handler |
| 2654 | * returns S_IWUSR if it has only a write hander | 2693 | * returns S_IWUSR if it has only a write hander |
| 2655 | */ | 2694 | */ |
| 2656 | static mode_t cgroup_file_mode(const struct cftype *cft) | 2695 | static umode_t cgroup_file_mode(const struct cftype *cft) |
| 2657 | { | 2696 | { |
| 2658 | mode_t mode = 0; | 2697 | umode_t mode = 0; |
| 2659 | 2698 | ||
| 2660 | if (cft->mode) | 2699 | if (cft->mode) |
| 2661 | return cft->mode; | 2700 | return cft->mode; |
| @@ -2678,7 +2717,7 @@ int cgroup_add_file(struct cgroup *cgrp, | |||
| 2678 | struct dentry *dir = cgrp->dentry; | 2717 | struct dentry *dir = cgrp->dentry; |
| 2679 | struct dentry *dentry; | 2718 | struct dentry *dentry; |
| 2680 | int error; | 2719 | int error; |
| 2681 | mode_t mode; | 2720 | umode_t mode; |
| 2682 | 2721 | ||
| 2683 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; | 2722 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; |
| 2684 | if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { | 2723 | if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { |
| @@ -2789,6 +2828,7 @@ static void cgroup_enable_task_cg_lists(void) | |||
| 2789 | } | 2828 | } |
| 2790 | 2829 | ||
| 2791 | void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) | 2830 | void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) |
| 2831 | __acquires(css_set_lock) | ||
| 2792 | { | 2832 | { |
| 2793 | /* | 2833 | /* |
| 2794 | * The first time anyone tries to iterate across a cgroup, | 2834 | * The first time anyone tries to iterate across a cgroup, |
| @@ -2828,6 +2868,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | |||
| 2828 | } | 2868 | } |
| 2829 | 2869 | ||
| 2830 | void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it) | 2870 | void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it) |
| 2871 | __releases(css_set_lock) | ||
| 2831 | { | 2872 | { |
| 2832 | read_unlock(&css_set_lock); | 2873 | read_unlock(&css_set_lock); |
| 2833 | } | 2874 | } |
| @@ -3752,7 +3793,7 @@ static void cgroup_unlock_hierarchy(struct cgroupfs_root *root) | |||
| 3752 | * Must be called with the mutex on the parent inode held | 3793 | * Must be called with the mutex on the parent inode held |
| 3753 | */ | 3794 | */ |
| 3754 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | 3795 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, |
| 3755 | mode_t mode) | 3796 | umode_t mode) |
| 3756 | { | 3797 | { |
| 3757 | struct cgroup *cgrp; | 3798 | struct cgroup *cgrp; |
| 3758 | struct cgroupfs_root *root = parent->root; | 3799 | struct cgroupfs_root *root = parent->root; |
| @@ -3846,7 +3887,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 3846 | return err; | 3887 | return err; |
| 3847 | } | 3888 | } |
| 3848 | 3889 | ||
| 3849 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode) | 3890 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) |
| 3850 | { | 3891 | { |
| 3851 | struct cgroup *c_parent = dentry->d_parent->d_fsdata; | 3892 | struct cgroup *c_parent = dentry->d_parent->d_fsdata; |
| 3852 | 3893 | ||
| @@ -4491,20 +4532,31 @@ static const struct file_operations proc_cgroupstats_operations = { | |||
| 4491 | * | 4532 | * |
| 4492 | * A pointer to the shared css_set was automatically copied in | 4533 | * A pointer to the shared css_set was automatically copied in |
| 4493 | * fork.c by dup_task_struct(). However, we ignore that copy, since | 4534 | * fork.c by dup_task_struct(). However, we ignore that copy, since |
| 4494 | * it was not made under the protection of RCU or cgroup_mutex, so | 4535 | * it was not made under the protection of RCU, cgroup_mutex or |
| 4495 | * might no longer be a valid cgroup pointer. cgroup_attach_task() might | 4536 | * threadgroup_change_begin(), so it might no longer be a valid |
| 4496 | * have already changed current->cgroups, allowing the previously | 4537 | * cgroup pointer. cgroup_attach_task() might have already changed |
| 4497 | * referenced cgroup group to be removed and freed. | 4538 | * current->cgroups, allowing the previously referenced cgroup |
| 4539 | * group to be removed and freed. | ||
| 4540 | * | ||
| 4541 | * Outside the pointer validity we also need to process the css_set | ||
| 4542 | * inheritance between threadgoup_change_begin() and | ||
| 4543 | * threadgoup_change_end(), this way there is no leak in any process | ||
| 4544 | * wide migration performed by cgroup_attach_proc() that could otherwise | ||
| 4545 | * miss a thread because it is too early or too late in the fork stage. | ||
| 4498 | * | 4546 | * |
| 4499 | * At the point that cgroup_fork() is called, 'current' is the parent | 4547 | * At the point that cgroup_fork() is called, 'current' is the parent |
| 4500 | * task, and the passed argument 'child' points to the child task. | 4548 | * task, and the passed argument 'child' points to the child task. |
| 4501 | */ | 4549 | */ |
| 4502 | void cgroup_fork(struct task_struct *child) | 4550 | void cgroup_fork(struct task_struct *child) |
| 4503 | { | 4551 | { |
| 4504 | task_lock(current); | 4552 | /* |
| 4553 | * We don't need to task_lock() current because current->cgroups | ||
| 4554 | * can't be changed concurrently here. The parent obviously hasn't | ||
| 4555 | * exited and called cgroup_exit(), and we are synchronized against | ||
| 4556 | * cgroup migration through threadgroup_change_begin(). | ||
| 4557 | */ | ||
| 4505 | child->cgroups = current->cgroups; | 4558 | child->cgroups = current->cgroups; |
| 4506 | get_css_set(child->cgroups); | 4559 | get_css_set(child->cgroups); |
| 4507 | task_unlock(current); | ||
| 4508 | INIT_LIST_HEAD(&child->cg_list); | 4560 | INIT_LIST_HEAD(&child->cg_list); |
| 4509 | } | 4561 | } |
| 4510 | 4562 | ||
| @@ -4546,10 +4598,19 @@ void cgroup_post_fork(struct task_struct *child) | |||
| 4546 | { | 4598 | { |
| 4547 | if (use_task_css_set_links) { | 4599 | if (use_task_css_set_links) { |
| 4548 | write_lock(&css_set_lock); | 4600 | write_lock(&css_set_lock); |
| 4549 | task_lock(child); | 4601 | if (list_empty(&child->cg_list)) { |
| 4550 | if (list_empty(&child->cg_list)) | 4602 | /* |
| 4603 | * It's safe to use child->cgroups without task_lock() | ||
| 4604 | * here because we are protected through | ||
| 4605 | * threadgroup_change_begin() against concurrent | ||
| 4606 | * css_set change in cgroup_task_migrate(). Also | ||
| 4607 | * the task can't exit at that point until | ||
| 4608 | * wake_up_new_task() is called, so we are protected | ||
| 4609 | * against cgroup_exit() setting child->cgroup to | ||
| 4610 | * init_css_set. | ||
| 4611 | */ | ||
| 4551 | list_add(&child->cg_list, &child->cgroups->tasks); | 4612 | list_add(&child->cg_list, &child->cgroups->tasks); |
| 4552 | task_unlock(child); | 4613 | } |
| 4553 | write_unlock(&css_set_lock); | 4614 | write_unlock(&css_set_lock); |
| 4554 | } | 4615 | } |
| 4555 | } | 4616 | } |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 213c0351dad8..fc0646b78a64 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
| @@ -48,19 +48,17 @@ static inline struct freezer *task_freezer(struct task_struct *task) | |||
| 48 | struct freezer, css); | 48 | struct freezer, css); |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | static inline int __cgroup_freezing_or_frozen(struct task_struct *task) | 51 | bool cgroup_freezing(struct task_struct *task) |
| 52 | { | 52 | { |
| 53 | enum freezer_state state = task_freezer(task)->state; | 53 | enum freezer_state state; |
| 54 | return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); | 54 | bool ret; |
| 55 | } | ||
| 56 | 55 | ||
| 57 | int cgroup_freezing_or_frozen(struct task_struct *task) | 56 | rcu_read_lock(); |
| 58 | { | 57 | state = task_freezer(task)->state; |
| 59 | int result; | 58 | ret = state == CGROUP_FREEZING || state == CGROUP_FROZEN; |
| 60 | task_lock(task); | 59 | rcu_read_unlock(); |
| 61 | result = __cgroup_freezing_or_frozen(task); | 60 | |
| 62 | task_unlock(task); | 61 | return ret; |
| 63 | return result; | ||
| 64 | } | 62 | } |
| 65 | 63 | ||
| 66 | /* | 64 | /* |
| @@ -102,9 +100,6 @@ struct cgroup_subsys freezer_subsys; | |||
| 102 | * freezer_can_attach(): | 100 | * freezer_can_attach(): |
| 103 | * cgroup_mutex (held by caller of can_attach) | 101 | * cgroup_mutex (held by caller of can_attach) |
| 104 | * | 102 | * |
| 105 | * cgroup_freezing_or_frozen(): | ||
| 106 | * task->alloc_lock (to get task's cgroup) | ||
| 107 | * | ||
| 108 | * freezer_fork() (preserving fork() performance means can't take cgroup_mutex): | 103 | * freezer_fork() (preserving fork() performance means can't take cgroup_mutex): |
| 109 | * freezer->lock | 104 | * freezer->lock |
| 110 | * sighand->siglock (if the cgroup is freezing) | 105 | * sighand->siglock (if the cgroup is freezing) |
| @@ -130,7 +125,7 @@ struct cgroup_subsys freezer_subsys; | |||
| 130 | * write_lock css_set_lock (cgroup iterator start) | 125 | * write_lock css_set_lock (cgroup iterator start) |
| 131 | * task->alloc_lock | 126 | * task->alloc_lock |
| 132 | * read_lock css_set_lock (cgroup iterator start) | 127 | * read_lock css_set_lock (cgroup iterator start) |
| 133 | * task->alloc_lock (inside thaw_process(), prevents race with refrigerator()) | 128 | * task->alloc_lock (inside __thaw_task(), prevents race with refrigerator()) |
| 134 | * sighand->siglock | 129 | * sighand->siglock |
| 135 | */ | 130 | */ |
| 136 | static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, | 131 | static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, |
| @@ -150,7 +145,11 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, | |||
| 150 | static void freezer_destroy(struct cgroup_subsys *ss, | 145 | static void freezer_destroy(struct cgroup_subsys *ss, |
| 151 | struct cgroup *cgroup) | 146 | struct cgroup *cgroup) |
| 152 | { | 147 | { |
| 153 | kfree(cgroup_freezer(cgroup)); | 148 | struct freezer *freezer = cgroup_freezer(cgroup); |
| 149 | |||
| 150 | if (freezer->state != CGROUP_THAWED) | ||
| 151 | atomic_dec(&system_freezing_cnt); | ||
| 152 | kfree(freezer); | ||
| 154 | } | 153 | } |
| 155 | 154 | ||
| 156 | /* task is frozen or will freeze immediately when next it gets woken */ | 155 | /* task is frozen or will freeze immediately when next it gets woken */ |
| @@ -167,13 +166,17 @@ static bool is_task_frozen_enough(struct task_struct *task) | |||
| 167 | */ | 166 | */ |
| 168 | static int freezer_can_attach(struct cgroup_subsys *ss, | 167 | static int freezer_can_attach(struct cgroup_subsys *ss, |
| 169 | struct cgroup *new_cgroup, | 168 | struct cgroup *new_cgroup, |
| 170 | struct task_struct *task) | 169 | struct cgroup_taskset *tset) |
| 171 | { | 170 | { |
| 172 | struct freezer *freezer; | 171 | struct freezer *freezer; |
| 172 | struct task_struct *task; | ||
| 173 | 173 | ||
| 174 | /* | 174 | /* |
| 175 | * Anything frozen can't move or be moved to/from. | 175 | * Anything frozen can't move or be moved to/from. |
| 176 | */ | 176 | */ |
| 177 | cgroup_taskset_for_each(task, new_cgroup, tset) | ||
| 178 | if (cgroup_freezing(task)) | ||
| 179 | return -EBUSY; | ||
| 177 | 180 | ||
| 178 | freezer = cgroup_freezer(new_cgroup); | 181 | freezer = cgroup_freezer(new_cgroup); |
| 179 | if (freezer->state != CGROUP_THAWED) | 182 | if (freezer->state != CGROUP_THAWED) |
| @@ -182,17 +185,6 @@ static int freezer_can_attach(struct cgroup_subsys *ss, | |||
| 182 | return 0; | 185 | return 0; |
| 183 | } | 186 | } |
| 184 | 187 | ||
| 185 | static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | ||
| 186 | { | ||
| 187 | rcu_read_lock(); | ||
| 188 | if (__cgroup_freezing_or_frozen(tsk)) { | ||
| 189 | rcu_read_unlock(); | ||
| 190 | return -EBUSY; | ||
| 191 | } | ||
| 192 | rcu_read_unlock(); | ||
| 193 | return 0; | ||
| 194 | } | ||
| 195 | |||
| 196 | static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) | 188 | static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) |
| 197 | { | 189 | { |
| 198 | struct freezer *freezer; | 190 | struct freezer *freezer; |
| @@ -220,7 +212,7 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) | |||
| 220 | 212 | ||
| 221 | /* Locking avoids race with FREEZING -> THAWED transitions. */ | 213 | /* Locking avoids race with FREEZING -> THAWED transitions. */ |
| 222 | if (freezer->state == CGROUP_FREEZING) | 214 | if (freezer->state == CGROUP_FREEZING) |
| 223 | freeze_task(task, true); | 215 | freeze_task(task); |
| 224 | spin_unlock_irq(&freezer->lock); | 216 | spin_unlock_irq(&freezer->lock); |
| 225 | } | 217 | } |
| 226 | 218 | ||
| @@ -238,7 +230,7 @@ static void update_if_frozen(struct cgroup *cgroup, | |||
| 238 | cgroup_iter_start(cgroup, &it); | 230 | cgroup_iter_start(cgroup, &it); |
| 239 | while ((task = cgroup_iter_next(cgroup, &it))) { | 231 | while ((task = cgroup_iter_next(cgroup, &it))) { |
| 240 | ntotal++; | 232 | ntotal++; |
| 241 | if (is_task_frozen_enough(task)) | 233 | if (freezing(task) && is_task_frozen_enough(task)) |
| 242 | nfrozen++; | 234 | nfrozen++; |
| 243 | } | 235 | } |
| 244 | 236 | ||
| @@ -286,10 +278,9 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) | |||
| 286 | struct task_struct *task; | 278 | struct task_struct *task; |
| 287 | unsigned int num_cant_freeze_now = 0; | 279 | unsigned int num_cant_freeze_now = 0; |
| 288 | 280 | ||
| 289 | freezer->state = CGROUP_FREEZING; | ||
| 290 | cgroup_iter_start(cgroup, &it); | 281 | cgroup_iter_start(cgroup, &it); |
| 291 | while ((task = cgroup_iter_next(cgroup, &it))) { | 282 | while ((task = cgroup_iter_next(cgroup, &it))) { |
| 292 | if (!freeze_task(task, true)) | 283 | if (!freeze_task(task)) |
| 293 | continue; | 284 | continue; |
| 294 | if (is_task_frozen_enough(task)) | 285 | if (is_task_frozen_enough(task)) |
| 295 | continue; | 286 | continue; |
| @@ -307,12 +298,9 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) | |||
| 307 | struct task_struct *task; | 298 | struct task_struct *task; |
| 308 | 299 | ||
| 309 | cgroup_iter_start(cgroup, &it); | 300 | cgroup_iter_start(cgroup, &it); |
| 310 | while ((task = cgroup_iter_next(cgroup, &it))) { | 301 | while ((task = cgroup_iter_next(cgroup, &it))) |
| 311 | thaw_process(task); | 302 | __thaw_task(task); |
| 312 | } | ||
| 313 | cgroup_iter_end(cgroup, &it); | 303 | cgroup_iter_end(cgroup, &it); |
| 314 | |||
| 315 | freezer->state = CGROUP_THAWED; | ||
| 316 | } | 304 | } |
| 317 | 305 | ||
| 318 | static int freezer_change_state(struct cgroup *cgroup, | 306 | static int freezer_change_state(struct cgroup *cgroup, |
| @@ -326,20 +314,24 @@ static int freezer_change_state(struct cgroup *cgroup, | |||
| 326 | spin_lock_irq(&freezer->lock); | 314 | spin_lock_irq(&freezer->lock); |
| 327 | 315 | ||
| 328 | update_if_frozen(cgroup, freezer); | 316 | update_if_frozen(cgroup, freezer); |
| 329 | if (goal_state == freezer->state) | ||
| 330 | goto out; | ||
| 331 | 317 | ||
| 332 | switch (goal_state) { | 318 | switch (goal_state) { |
| 333 | case CGROUP_THAWED: | 319 | case CGROUP_THAWED: |
| 320 | if (freezer->state != CGROUP_THAWED) | ||
| 321 | atomic_dec(&system_freezing_cnt); | ||
| 322 | freezer->state = CGROUP_THAWED; | ||
| 334 | unfreeze_cgroup(cgroup, freezer); | 323 | unfreeze_cgroup(cgroup, freezer); |
| 335 | break; | 324 | break; |
| 336 | case CGROUP_FROZEN: | 325 | case CGROUP_FROZEN: |
| 326 | if (freezer->state == CGROUP_THAWED) | ||
| 327 | atomic_inc(&system_freezing_cnt); | ||
| 328 | freezer->state = CGROUP_FREEZING; | ||
| 337 | retval = try_to_freeze_cgroup(cgroup, freezer); | 329 | retval = try_to_freeze_cgroup(cgroup, freezer); |
| 338 | break; | 330 | break; |
| 339 | default: | 331 | default: |
| 340 | BUG(); | 332 | BUG(); |
| 341 | } | 333 | } |
| 342 | out: | 334 | |
| 343 | spin_unlock_irq(&freezer->lock); | 335 | spin_unlock_irq(&freezer->lock); |
| 344 | 336 | ||
| 345 | return retval; | 337 | return retval; |
| @@ -388,10 +380,5 @@ struct cgroup_subsys freezer_subsys = { | |||
| 388 | .populate = freezer_populate, | 380 | .populate = freezer_populate, |
| 389 | .subsys_id = freezer_subsys_id, | 381 | .subsys_id = freezer_subsys_id, |
| 390 | .can_attach = freezer_can_attach, | 382 | .can_attach = freezer_can_attach, |
| 391 | .can_attach_task = freezer_can_attach_task, | ||
| 392 | .pre_attach = NULL, | ||
| 393 | .attach_task = NULL, | ||
| 394 | .attach = NULL, | ||
| 395 | .fork = freezer_fork, | 383 | .fork = freezer_fork, |
| 396 | .exit = NULL, | ||
| 397 | }; | 384 | }; |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 5ca38d5d238a..2060c6e57027 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -470,7 +470,7 @@ out: | |||
| 470 | cpu_maps_update_done(); | 470 | cpu_maps_update_done(); |
| 471 | } | 471 | } |
| 472 | 472 | ||
| 473 | static int alloc_frozen_cpus(void) | 473 | static int __init alloc_frozen_cpus(void) |
| 474 | { | 474 | { |
| 475 | if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO)) | 475 | if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO)) |
| 476 | return -ENOMEM; | 476 | return -ENOMEM; |
| @@ -543,7 +543,7 @@ cpu_hotplug_pm_callback(struct notifier_block *nb, | |||
| 543 | } | 543 | } |
| 544 | 544 | ||
| 545 | 545 | ||
| 546 | int cpu_hotplug_pm_sync_init(void) | 546 | static int __init cpu_hotplug_pm_sync_init(void) |
| 547 | { | 547 | { |
| 548 | pm_notifier(cpu_hotplug_pm_callback, 0); | 548 | pm_notifier(cpu_hotplug_pm_callback, 0); |
| 549 | return 0; | 549 | return 0; |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 0b1712dba587..a09ac2b9a661 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -1389,79 +1389,73 @@ static int fmeter_getrate(struct fmeter *fmp) | |||
| 1389 | return val; | 1389 | return val; |
| 1390 | } | 1390 | } |
| 1391 | 1391 | ||
| 1392 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ | ||
| 1393 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | ||
| 1394 | struct task_struct *tsk) | ||
| 1395 | { | ||
| 1396 | struct cpuset *cs = cgroup_cs(cont); | ||
| 1397 | |||
| 1398 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | ||
| 1399 | return -ENOSPC; | ||
| 1400 | |||
| 1401 | /* | ||
| 1402 | * Kthreads bound to specific cpus cannot be moved to a new cpuset; we | ||
| 1403 | * cannot change their cpu affinity and isolating such threads by their | ||
| 1404 | * set of allowed nodes is unnecessary. Thus, cpusets are not | ||
| 1405 | * applicable for such threads. This prevents checking for success of | ||
| 1406 | * set_cpus_allowed_ptr() on all attached tasks before cpus_allowed may | ||
| 1407 | * be changed. | ||
| 1408 | */ | ||
| 1409 | if (tsk->flags & PF_THREAD_BOUND) | ||
| 1410 | return -EINVAL; | ||
| 1411 | |||
| 1412 | return 0; | ||
| 1413 | } | ||
| 1414 | |||
| 1415 | static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task) | ||
| 1416 | { | ||
| 1417 | return security_task_setscheduler(task); | ||
| 1418 | } | ||
| 1419 | |||
| 1420 | /* | 1392 | /* |
| 1421 | * Protected by cgroup_lock. The nodemasks must be stored globally because | 1393 | * Protected by cgroup_lock. The nodemasks must be stored globally because |
| 1422 | * dynamically allocating them is not allowed in pre_attach, and they must | 1394 | * dynamically allocating them is not allowed in can_attach, and they must |
| 1423 | * persist among pre_attach, attach_task, and attach. | 1395 | * persist until attach. |
| 1424 | */ | 1396 | */ |
| 1425 | static cpumask_var_t cpus_attach; | 1397 | static cpumask_var_t cpus_attach; |
| 1426 | static nodemask_t cpuset_attach_nodemask_from; | 1398 | static nodemask_t cpuset_attach_nodemask_from; |
| 1427 | static nodemask_t cpuset_attach_nodemask_to; | 1399 | static nodemask_t cpuset_attach_nodemask_to; |
| 1428 | 1400 | ||
| 1429 | /* Set-up work for before attaching each task. */ | 1401 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ |
| 1430 | static void cpuset_pre_attach(struct cgroup *cont) | 1402 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
| 1403 | struct cgroup_taskset *tset) | ||
| 1431 | { | 1404 | { |
| 1432 | struct cpuset *cs = cgroup_cs(cont); | 1405 | struct cpuset *cs = cgroup_cs(cgrp); |
| 1406 | struct task_struct *task; | ||
| 1407 | int ret; | ||
| 1408 | |||
| 1409 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | ||
| 1410 | return -ENOSPC; | ||
| 1411 | |||
| 1412 | cgroup_taskset_for_each(task, cgrp, tset) { | ||
| 1413 | /* | ||
| 1414 | * Kthreads bound to specific cpus cannot be moved to a new | ||
| 1415 | * cpuset; we cannot change their cpu affinity and | ||
| 1416 | * isolating such threads by their set of allowed nodes is | ||
| 1417 | * unnecessary. Thus, cpusets are not applicable for such | ||
| 1418 | * threads. This prevents checking for success of | ||
| 1419 | * set_cpus_allowed_ptr() on all attached tasks before | ||
| 1420 | * cpus_allowed may be changed. | ||
| 1421 | */ | ||
| 1422 | if (task->flags & PF_THREAD_BOUND) | ||
| 1423 | return -EINVAL; | ||
| 1424 | if ((ret = security_task_setscheduler(task))) | ||
| 1425 | return ret; | ||
| 1426 | } | ||
| 1433 | 1427 | ||
| 1428 | /* prepare for attach */ | ||
| 1434 | if (cs == &top_cpuset) | 1429 | if (cs == &top_cpuset) |
| 1435 | cpumask_copy(cpus_attach, cpu_possible_mask); | 1430 | cpumask_copy(cpus_attach, cpu_possible_mask); |
| 1436 | else | 1431 | else |
| 1437 | guarantee_online_cpus(cs, cpus_attach); | 1432 | guarantee_online_cpus(cs, cpus_attach); |
| 1438 | 1433 | ||
| 1439 | guarantee_online_mems(cs, &cpuset_attach_nodemask_to); | 1434 | guarantee_online_mems(cs, &cpuset_attach_nodemask_to); |
| 1440 | } | ||
| 1441 | |||
| 1442 | /* Per-thread attachment work. */ | ||
| 1443 | static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk) | ||
| 1444 | { | ||
| 1445 | int err; | ||
| 1446 | struct cpuset *cs = cgroup_cs(cont); | ||
| 1447 | 1435 | ||
| 1448 | /* | 1436 | return 0; |
| 1449 | * can_attach beforehand should guarantee that this doesn't fail. | ||
| 1450 | * TODO: have a better way to handle failure here | ||
| 1451 | */ | ||
| 1452 | err = set_cpus_allowed_ptr(tsk, cpus_attach); | ||
| 1453 | WARN_ON_ONCE(err); | ||
| 1454 | |||
| 1455 | cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to); | ||
| 1456 | cpuset_update_task_spread_flag(cs, tsk); | ||
| 1457 | } | 1437 | } |
| 1458 | 1438 | ||
| 1459 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, | 1439 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
| 1460 | struct cgroup *oldcont, struct task_struct *tsk) | 1440 | struct cgroup_taskset *tset) |
| 1461 | { | 1441 | { |
| 1462 | struct mm_struct *mm; | 1442 | struct mm_struct *mm; |
| 1463 | struct cpuset *cs = cgroup_cs(cont); | 1443 | struct task_struct *task; |
| 1464 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1444 | struct task_struct *leader = cgroup_taskset_first(tset); |
| 1445 | struct cgroup *oldcgrp = cgroup_taskset_cur_cgroup(tset); | ||
| 1446 | struct cpuset *cs = cgroup_cs(cgrp); | ||
| 1447 | struct cpuset *oldcs = cgroup_cs(oldcgrp); | ||
| 1448 | |||
| 1449 | cgroup_taskset_for_each(task, cgrp, tset) { | ||
| 1450 | /* | ||
| 1451 | * can_attach beforehand should guarantee that this doesn't | ||
| 1452 | * fail. TODO: have a better way to handle failure here | ||
| 1453 | */ | ||
| 1454 | WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); | ||
| 1455 | |||
| 1456 | cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); | ||
| 1457 | cpuset_update_task_spread_flag(cs, task); | ||
| 1458 | } | ||
| 1465 | 1459 | ||
| 1466 | /* | 1460 | /* |
| 1467 | * Change mm, possibly for multiple threads in a threadgroup. This is | 1461 | * Change mm, possibly for multiple threads in a threadgroup. This is |
| @@ -1469,7 +1463,7 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
| 1469 | */ | 1463 | */ |
| 1470 | cpuset_attach_nodemask_from = oldcs->mems_allowed; | 1464 | cpuset_attach_nodemask_from = oldcs->mems_allowed; |
| 1471 | cpuset_attach_nodemask_to = cs->mems_allowed; | 1465 | cpuset_attach_nodemask_to = cs->mems_allowed; |
| 1472 | mm = get_task_mm(tsk); | 1466 | mm = get_task_mm(leader); |
| 1473 | if (mm) { | 1467 | if (mm) { |
| 1474 | mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); | 1468 | mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); |
| 1475 | if (is_memory_migrate(cs)) | 1469 | if (is_memory_migrate(cs)) |
| @@ -1925,9 +1919,6 @@ struct cgroup_subsys cpuset_subsys = { | |||
| 1925 | .create = cpuset_create, | 1919 | .create = cpuset_create, |
| 1926 | .destroy = cpuset_destroy, | 1920 | .destroy = cpuset_destroy, |
| 1927 | .can_attach = cpuset_can_attach, | 1921 | .can_attach = cpuset_can_attach, |
| 1928 | .can_attach_task = cpuset_can_attach_task, | ||
| 1929 | .pre_attach = cpuset_pre_attach, | ||
| 1930 | .attach_task = cpuset_attach_task, | ||
| 1931 | .attach = cpuset_attach, | 1922 | .attach = cpuset_attach, |
| 1932 | .populate = cpuset_populate, | 1923 | .populate = cpuset_populate, |
| 1933 | .post_clone = cpuset_post_clone, | 1924 | .post_clone = cpuset_post_clone, |
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 63786e71a3cd..e2ae7349437f 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
| @@ -1982,7 +1982,7 @@ static int kdb_lsmod(int argc, const char **argv) | |||
| 1982 | kdb_printf("%-20s%8u 0x%p ", mod->name, | 1982 | kdb_printf("%-20s%8u 0x%p ", mod->name, |
| 1983 | mod->core_size, (void *)mod); | 1983 | mod->core_size, (void *)mod); |
| 1984 | #ifdef CONFIG_MODULE_UNLOAD | 1984 | #ifdef CONFIG_MODULE_UNLOAD |
| 1985 | kdb_printf("%4d ", module_refcount(mod)); | 1985 | kdb_printf("%4ld ", module_refcount(mod)); |
| 1986 | #endif | 1986 | #endif |
| 1987 | if (mod->state == MODULE_STATE_GOING) | 1987 | if (mod->state == MODULE_STATE_GOING) |
| 1988 | kdb_printf(" (Unloading)"); | 1988 | kdb_printf(" (Unloading)"); |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 890eb02c2f21..a8f4ac001a00 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | 4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> |
| 5 | * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar | 5 | * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar |
| 6 | * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 6 | * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> |
| 7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | 7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> |
| 8 | * | 8 | * |
| 9 | * For licensing details see kernel-base/COPYING | 9 | * For licensing details see kernel-base/COPYING |
| 10 | */ | 10 | */ |
| @@ -6941,10 +6941,13 @@ static int __perf_cgroup_move(void *info) | |||
| 6941 | return 0; | 6941 | return 0; |
| 6942 | } | 6942 | } |
| 6943 | 6943 | ||
| 6944 | static void | 6944 | static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
| 6945 | perf_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *task) | 6945 | struct cgroup_taskset *tset) |
| 6946 | { | 6946 | { |
| 6947 | task_function_call(task, __perf_cgroup_move, task); | 6947 | struct task_struct *task; |
| 6948 | |||
| 6949 | cgroup_taskset_for_each(task, cgrp, tset) | ||
| 6950 | task_function_call(task, __perf_cgroup_move, task); | ||
| 6948 | } | 6951 | } |
| 6949 | 6952 | ||
| 6950 | static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, | 6953 | static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, |
| @@ -6958,7 +6961,7 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
| 6958 | if (!(task->flags & PF_EXITING)) | 6961 | if (!(task->flags & PF_EXITING)) |
| 6959 | return; | 6962 | return; |
| 6960 | 6963 | ||
| 6961 | perf_cgroup_attach_task(cgrp, task); | 6964 | task_function_call(task, __perf_cgroup_move, task); |
| 6962 | } | 6965 | } |
| 6963 | 6966 | ||
| 6964 | struct cgroup_subsys perf_subsys = { | 6967 | struct cgroup_subsys perf_subsys = { |
| @@ -6967,6 +6970,6 @@ struct cgroup_subsys perf_subsys = { | |||
| 6967 | .create = perf_cgroup_create, | 6970 | .create = perf_cgroup_create, |
| 6968 | .destroy = perf_cgroup_destroy, | 6971 | .destroy = perf_cgroup_destroy, |
| 6969 | .exit = perf_cgroup_exit, | 6972 | .exit = perf_cgroup_exit, |
| 6970 | .attach_task = perf_cgroup_attach_task, | 6973 | .attach = perf_cgroup_attach, |
| 6971 | }; | 6974 | }; |
| 6972 | #endif /* CONFIG_CGROUP_PERF */ | 6975 | #endif /* CONFIG_CGROUP_PERF */ |
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 7f3011c6b57f..6ddaba43fb7a 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | 4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> |
| 5 | * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar | 5 | * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar |
| 6 | * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 6 | * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> |
| 7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | 7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> |
| 8 | * | 8 | * |
| 9 | * For licensing details see kernel-base/COPYING | 9 | * For licensing details see kernel-base/COPYING |
| 10 | */ | 10 | */ |
diff --git a/kernel/exit.c b/kernel/exit.c index d579a459309d..294b1709170d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -51,6 +51,7 @@ | |||
| 51 | #include <trace/events/sched.h> | 51 | #include <trace/events/sched.h> |
| 52 | #include <linux/hw_breakpoint.h> | 52 | #include <linux/hw_breakpoint.h> |
| 53 | #include <linux/oom.h> | 53 | #include <linux/oom.h> |
| 54 | #include <linux/writeback.h> | ||
| 54 | 55 | ||
| 55 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
| 56 | #include <asm/unistd.h> | 57 | #include <asm/unistd.h> |
| @@ -679,8 +680,6 @@ static void exit_mm(struct task_struct * tsk) | |||
| 679 | tsk->mm = NULL; | 680 | tsk->mm = NULL; |
| 680 | up_read(&mm->mmap_sem); | 681 | up_read(&mm->mmap_sem); |
| 681 | enter_lazy_tlb(mm, current); | 682 | enter_lazy_tlb(mm, current); |
| 682 | /* We don't want this task to be frozen prematurely */ | ||
| 683 | clear_freeze_flag(tsk); | ||
| 684 | task_unlock(tsk); | 683 | task_unlock(tsk); |
| 685 | mm_update_next_owner(mm); | 684 | mm_update_next_owner(mm); |
| 686 | mmput(mm); | 685 | mmput(mm); |
| @@ -888,7 +887,7 @@ static void check_stack_usage(void) | |||
| 888 | static inline void check_stack_usage(void) {} | 887 | static inline void check_stack_usage(void) {} |
| 889 | #endif | 888 | #endif |
| 890 | 889 | ||
| 891 | NORET_TYPE void do_exit(long code) | 890 | void do_exit(long code) |
| 892 | { | 891 | { |
| 893 | struct task_struct *tsk = current; | 892 | struct task_struct *tsk = current; |
| 894 | int group_dead; | 893 | int group_dead; |
| @@ -965,8 +964,7 @@ NORET_TYPE void do_exit(long code) | |||
| 965 | acct_collect(code, group_dead); | 964 | acct_collect(code, group_dead); |
| 966 | if (group_dead) | 965 | if (group_dead) |
| 967 | tty_audit_exit(); | 966 | tty_audit_exit(); |
| 968 | if (unlikely(tsk->audit_context)) | 967 | audit_free(tsk); |
| 969 | audit_free(tsk); | ||
| 970 | 968 | ||
| 971 | tsk->exit_code = code; | 969 | tsk->exit_code = code; |
| 972 | taskstats_exit(tsk, group_dead); | 970 | taskstats_exit(tsk, group_dead); |
| @@ -1037,9 +1035,12 @@ NORET_TYPE void do_exit(long code) | |||
| 1037 | validate_creds_for_do_exit(tsk); | 1035 | validate_creds_for_do_exit(tsk); |
| 1038 | 1036 | ||
| 1039 | preempt_disable(); | 1037 | preempt_disable(); |
| 1038 | if (tsk->nr_dirtied) | ||
| 1039 | __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); | ||
| 1040 | exit_rcu(); | 1040 | exit_rcu(); |
| 1041 | /* causes final put_task_struct in finish_task_switch(). */ | 1041 | /* causes final put_task_struct in finish_task_switch(). */ |
| 1042 | tsk->state = TASK_DEAD; | 1042 | tsk->state = TASK_DEAD; |
| 1043 | tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */ | ||
| 1043 | schedule(); | 1044 | schedule(); |
| 1044 | BUG(); | 1045 | BUG(); |
| 1045 | /* Avoid "noreturn function does return". */ | 1046 | /* Avoid "noreturn function does return". */ |
| @@ -1049,7 +1050,7 @@ NORET_TYPE void do_exit(long code) | |||
| 1049 | 1050 | ||
| 1050 | EXPORT_SYMBOL_GPL(do_exit); | 1051 | EXPORT_SYMBOL_GPL(do_exit); |
| 1051 | 1052 | ||
| 1052 | NORET_TYPE void complete_and_exit(struct completion *comp, long code) | 1053 | void complete_and_exit(struct completion *comp, long code) |
| 1053 | { | 1054 | { |
| 1054 | if (comp) | 1055 | if (comp) |
| 1055 | complete(comp); | 1056 | complete(comp); |
| @@ -1068,7 +1069,7 @@ SYSCALL_DEFINE1(exit, int, error_code) | |||
| 1068 | * Take down every thread in the group. This is called by fatal signals | 1069 | * Take down every thread in the group. This is called by fatal signals |
| 1069 | * as well as by sys_exit_group (below). | 1070 | * as well as by sys_exit_group (below). |
| 1070 | */ | 1071 | */ |
| 1071 | NORET_TYPE void | 1072 | void |
| 1072 | do_group_exit(int exit_code) | 1073 | do_group_exit(int exit_code) |
| 1073 | { | 1074 | { |
| 1074 | struct signal_struct *sig = current->signal; | 1075 | struct signal_struct *sig = current->signal; |
diff --git a/kernel/fork.c b/kernel/fork.c index b058c5820ecd..051f090d40c1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -76,6 +76,9 @@ | |||
| 76 | 76 | ||
| 77 | #include <trace/events/sched.h> | 77 | #include <trace/events/sched.h> |
| 78 | 78 | ||
| 79 | #define CREATE_TRACE_POINTS | ||
| 80 | #include <trace/events/task.h> | ||
| 81 | |||
| 79 | /* | 82 | /* |
| 80 | * Protected counters by write_lock_irq(&tasklist_lock) | 83 | * Protected counters by write_lock_irq(&tasklist_lock) |
| 81 | */ | 84 | */ |
| @@ -870,6 +873,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk) | |||
| 870 | { | 873 | { |
| 871 | #ifdef CONFIG_BLOCK | 874 | #ifdef CONFIG_BLOCK |
| 872 | struct io_context *ioc = current->io_context; | 875 | struct io_context *ioc = current->io_context; |
| 876 | struct io_context *new_ioc; | ||
| 873 | 877 | ||
| 874 | if (!ioc) | 878 | if (!ioc) |
| 875 | return 0; | 879 | return 0; |
| @@ -881,11 +885,12 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk) | |||
| 881 | if (unlikely(!tsk->io_context)) | 885 | if (unlikely(!tsk->io_context)) |
| 882 | return -ENOMEM; | 886 | return -ENOMEM; |
| 883 | } else if (ioprio_valid(ioc->ioprio)) { | 887 | } else if (ioprio_valid(ioc->ioprio)) { |
| 884 | tsk->io_context = alloc_io_context(GFP_KERNEL, -1); | 888 | new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); |
| 885 | if (unlikely(!tsk->io_context)) | 889 | if (unlikely(!new_ioc)) |
| 886 | return -ENOMEM; | 890 | return -ENOMEM; |
| 887 | 891 | ||
| 888 | tsk->io_context->ioprio = ioc->ioprio; | 892 | new_ioc->ioprio = ioc->ioprio; |
| 893 | put_io_context(new_ioc, NULL); | ||
| 889 | } | 894 | } |
| 890 | #endif | 895 | #endif |
| 891 | return 0; | 896 | return 0; |
| @@ -972,7 +977,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
| 972 | sched_autogroup_fork(sig); | 977 | sched_autogroup_fork(sig); |
| 973 | 978 | ||
| 974 | #ifdef CONFIG_CGROUPS | 979 | #ifdef CONFIG_CGROUPS |
| 975 | init_rwsem(&sig->threadgroup_fork_lock); | 980 | init_rwsem(&sig->group_rwsem); |
| 976 | #endif | 981 | #endif |
| 977 | 982 | ||
| 978 | sig->oom_adj = current->signal->oom_adj; | 983 | sig->oom_adj = current->signal->oom_adj; |
| @@ -992,7 +997,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) | |||
| 992 | new_flags |= PF_FORKNOEXEC; | 997 | new_flags |= PF_FORKNOEXEC; |
| 993 | new_flags |= PF_STARTING; | 998 | new_flags |= PF_STARTING; |
| 994 | p->flags = new_flags; | 999 | p->flags = new_flags; |
| 995 | clear_freeze_flag(p); | ||
| 996 | } | 1000 | } |
| 997 | 1001 | ||
| 998 | SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) | 1002 | SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) |
| @@ -1154,7 +1158,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1154 | p->io_context = NULL; | 1158 | p->io_context = NULL; |
| 1155 | p->audit_context = NULL; | 1159 | p->audit_context = NULL; |
| 1156 | if (clone_flags & CLONE_THREAD) | 1160 | if (clone_flags & CLONE_THREAD) |
| 1157 | threadgroup_fork_read_lock(current); | 1161 | threadgroup_change_begin(current); |
| 1158 | cgroup_fork(p); | 1162 | cgroup_fork(p); |
| 1159 | #ifdef CONFIG_NUMA | 1163 | #ifdef CONFIG_NUMA |
| 1160 | p->mempolicy = mpol_dup(p->mempolicy); | 1164 | p->mempolicy = mpol_dup(p->mempolicy); |
| @@ -1292,6 +1296,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1292 | 1296 | ||
| 1293 | p->nr_dirtied = 0; | 1297 | p->nr_dirtied = 0; |
| 1294 | p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); | 1298 | p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); |
| 1299 | p->dirty_paused_when = 0; | ||
| 1295 | 1300 | ||
| 1296 | /* | 1301 | /* |
| 1297 | * Ok, make it visible to the rest of the system. | 1302 | * Ok, make it visible to the rest of the system. |
| @@ -1369,8 +1374,11 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1369 | proc_fork_connector(p); | 1374 | proc_fork_connector(p); |
| 1370 | cgroup_post_fork(p); | 1375 | cgroup_post_fork(p); |
| 1371 | if (clone_flags & CLONE_THREAD) | 1376 | if (clone_flags & CLONE_THREAD) |
| 1372 | threadgroup_fork_read_unlock(current); | 1377 | threadgroup_change_end(current); |
| 1373 | perf_event_fork(p); | 1378 | perf_event_fork(p); |
| 1379 | |||
| 1380 | trace_task_newtask(p, clone_flags); | ||
| 1381 | |||
| 1374 | return p; | 1382 | return p; |
| 1375 | 1383 | ||
| 1376 | bad_fork_free_pid: | 1384 | bad_fork_free_pid: |
| @@ -1404,7 +1412,7 @@ bad_fork_cleanup_policy: | |||
| 1404 | bad_fork_cleanup_cgroup: | 1412 | bad_fork_cleanup_cgroup: |
| 1405 | #endif | 1413 | #endif |
| 1406 | if (clone_flags & CLONE_THREAD) | 1414 | if (clone_flags & CLONE_THREAD) |
| 1407 | threadgroup_fork_read_unlock(current); | 1415 | threadgroup_change_end(current); |
| 1408 | cgroup_exit(p, cgroup_callbacks_done); | 1416 | cgroup_exit(p, cgroup_callbacks_done); |
| 1409 | delayacct_tsk_free(p); | 1417 | delayacct_tsk_free(p); |
| 1410 | module_put(task_thread_info(p)->exec_domain->module); | 1418 | module_put(task_thread_info(p)->exec_domain->module); |
| @@ -1519,8 +1527,6 @@ long do_fork(unsigned long clone_flags, | |||
| 1519 | init_completion(&vfork); | 1527 | init_completion(&vfork); |
| 1520 | } | 1528 | } |
| 1521 | 1529 | ||
| 1522 | audit_finish_fork(p); | ||
| 1523 | |||
| 1524 | /* | 1530 | /* |
| 1525 | * We set PF_STARTING at creation in case tracing wants to | 1531 | * We set PF_STARTING at creation in case tracing wants to |
| 1526 | * use this to distinguish a fully live task from one that | 1532 | * use this to distinguish a fully live task from one that |
diff --git a/kernel/freezer.c b/kernel/freezer.c index 7be56c534397..9815b8d1eed5 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c | |||
| @@ -9,101 +9,114 @@ | |||
| 9 | #include <linux/export.h> | 9 | #include <linux/export.h> |
| 10 | #include <linux/syscalls.h> | 10 | #include <linux/syscalls.h> |
| 11 | #include <linux/freezer.h> | 11 | #include <linux/freezer.h> |
| 12 | #include <linux/kthread.h> | ||
| 12 | 13 | ||
| 13 | /* | 14 | /* total number of freezing conditions in effect */ |
| 14 | * freezing is complete, mark current process as frozen | 15 | atomic_t system_freezing_cnt = ATOMIC_INIT(0); |
| 16 | EXPORT_SYMBOL(system_freezing_cnt); | ||
| 17 | |||
| 18 | /* indicate whether PM freezing is in effect, protected by pm_mutex */ | ||
| 19 | bool pm_freezing; | ||
| 20 | bool pm_nosig_freezing; | ||
| 21 | |||
| 22 | /* protects freezing and frozen transitions */ | ||
| 23 | static DEFINE_SPINLOCK(freezer_lock); | ||
| 24 | |||
| 25 | /** | ||
| 26 | * freezing_slow_path - slow path for testing whether a task needs to be frozen | ||
| 27 | * @p: task to be tested | ||
| 28 | * | ||
| 29 | * This function is called by freezing() if system_freezing_cnt isn't zero | ||
| 30 | * and tests whether @p needs to enter and stay in frozen state. Can be | ||
| 31 | * called under any context. The freezers are responsible for ensuring the | ||
| 32 | * target tasks see the updated state. | ||
| 15 | */ | 33 | */ |
| 16 | static inline void frozen_process(void) | 34 | bool freezing_slow_path(struct task_struct *p) |
| 17 | { | 35 | { |
| 18 | if (!unlikely(current->flags & PF_NOFREEZE)) { | 36 | if (p->flags & PF_NOFREEZE) |
| 19 | current->flags |= PF_FROZEN; | 37 | return false; |
| 20 | smp_wmb(); | 38 | |
| 21 | } | 39 | if (pm_nosig_freezing || cgroup_freezing(p)) |
| 22 | clear_freeze_flag(current); | 40 | return true; |
| 41 | |||
| 42 | if (pm_freezing && !(p->flags & PF_KTHREAD)) | ||
| 43 | return true; | ||
| 44 | |||
| 45 | return false; | ||
| 23 | } | 46 | } |
| 47 | EXPORT_SYMBOL(freezing_slow_path); | ||
| 24 | 48 | ||
| 25 | /* Refrigerator is place where frozen processes are stored :-). */ | 49 | /* Refrigerator is place where frozen processes are stored :-). */ |
| 26 | void refrigerator(void) | 50 | bool __refrigerator(bool check_kthr_stop) |
| 27 | { | 51 | { |
| 28 | /* Hmm, should we be allowed to suspend when there are realtime | 52 | /* Hmm, should we be allowed to suspend when there are realtime |
| 29 | processes around? */ | 53 | processes around? */ |
| 30 | long save; | 54 | bool was_frozen = false; |
| 55 | long save = current->state; | ||
| 31 | 56 | ||
| 32 | task_lock(current); | ||
| 33 | if (freezing(current)) { | ||
| 34 | frozen_process(); | ||
| 35 | task_unlock(current); | ||
| 36 | } else { | ||
| 37 | task_unlock(current); | ||
| 38 | return; | ||
| 39 | } | ||
| 40 | save = current->state; | ||
| 41 | pr_debug("%s entered refrigerator\n", current->comm); | 57 | pr_debug("%s entered refrigerator\n", current->comm); |
| 42 | 58 | ||
| 43 | spin_lock_irq(¤t->sighand->siglock); | ||
| 44 | recalc_sigpending(); /* We sent fake signal, clean it up */ | ||
| 45 | spin_unlock_irq(¤t->sighand->siglock); | ||
| 46 | |||
| 47 | /* prevent accounting of that task to load */ | ||
| 48 | current->flags |= PF_FREEZING; | ||
| 49 | |||
| 50 | for (;;) { | 59 | for (;;) { |
| 51 | set_current_state(TASK_UNINTERRUPTIBLE); | 60 | set_current_state(TASK_UNINTERRUPTIBLE); |
| 52 | if (!frozen(current)) | 61 | |
| 62 | spin_lock_irq(&freezer_lock); | ||
| 63 | current->flags |= PF_FROZEN; | ||
| 64 | if (!freezing(current) || | ||
| 65 | (check_kthr_stop && kthread_should_stop())) | ||
| 66 | current->flags &= ~PF_FROZEN; | ||
| 67 | spin_unlock_irq(&freezer_lock); | ||
| 68 | |||
| 69 | if (!(current->flags & PF_FROZEN)) | ||
| 53 | break; | 70 | break; |
| 71 | was_frozen = true; | ||
| 54 | schedule(); | 72 | schedule(); |
| 55 | } | 73 | } |
| 56 | 74 | ||
| 57 | /* Remove the accounting blocker */ | ||
| 58 | current->flags &= ~PF_FREEZING; | ||
| 59 | |||
| 60 | pr_debug("%s left refrigerator\n", current->comm); | 75 | pr_debug("%s left refrigerator\n", current->comm); |
| 61 | __set_current_state(save); | 76 | |
| 77 | /* | ||
| 78 | * Restore saved task state before returning. The mb'd version | ||
| 79 | * needs to be used; otherwise, it might silently break | ||
| 80 | * synchronization which depends on ordered task state change. | ||
| 81 | */ | ||
| 82 | set_current_state(save); | ||
| 83 | |||
| 84 | return was_frozen; | ||
| 62 | } | 85 | } |
| 63 | EXPORT_SYMBOL(refrigerator); | 86 | EXPORT_SYMBOL(__refrigerator); |
| 64 | 87 | ||
| 65 | static void fake_signal_wake_up(struct task_struct *p) | 88 | static void fake_signal_wake_up(struct task_struct *p) |
| 66 | { | 89 | { |
| 67 | unsigned long flags; | 90 | unsigned long flags; |
| 68 | 91 | ||
| 69 | spin_lock_irqsave(&p->sighand->siglock, flags); | 92 | if (lock_task_sighand(p, &flags)) { |
| 70 | signal_wake_up(p, 0); | 93 | signal_wake_up(p, 0); |
| 71 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 94 | unlock_task_sighand(p, &flags); |
| 95 | } | ||
| 72 | } | 96 | } |
| 73 | 97 | ||
| 74 | /** | 98 | /** |
| 75 | * freeze_task - send a freeze request to given task | 99 | * freeze_task - send a freeze request to given task |
| 76 | * @p: task to send the request to | 100 | * @p: task to send the request to |
| 77 | * @sig_only: if set, the request will only be sent if the task has the | 101 | * |
| 78 | * PF_FREEZER_NOSIG flag unset | 102 | * If @p is freezing, the freeze request is sent by setting %TIF_FREEZE |
| 79 | * Return value: 'false', if @sig_only is set and the task has | 103 | * flag and either sending a fake signal to it or waking it up, depending |
| 80 | * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise | 104 | * on whether it has %PF_FREEZER_NOSIG set. |
| 81 | * | 105 | * |
| 82 | * The freeze request is sent by setting the tasks's TIF_FREEZE flag and | 106 | * RETURNS: |
| 83 | * either sending a fake signal to it or waking it up, depending on whether | 107 | * %false, if @p is not freezing or already frozen; %true, otherwise |
| 84 | * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task | ||
| 85 | * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its | ||
| 86 | * TIF_FREEZE flag will not be set. | ||
| 87 | */ | 108 | */ |
| 88 | bool freeze_task(struct task_struct *p, bool sig_only) | 109 | bool freeze_task(struct task_struct *p) |
| 89 | { | 110 | { |
| 90 | /* | 111 | unsigned long flags; |
| 91 | * We first check if the task is freezing and next if it has already | 112 | |
| 92 | * been frozen to avoid the race with frozen_process() which first marks | 113 | spin_lock_irqsave(&freezer_lock, flags); |
| 93 | * the task as frozen and next clears its TIF_FREEZE. | 114 | if (!freezing(p) || frozen(p)) { |
| 94 | */ | 115 | spin_unlock_irqrestore(&freezer_lock, flags); |
| 95 | if (!freezing(p)) { | 116 | return false; |
| 96 | smp_rmb(); | ||
| 97 | if (frozen(p)) | ||
| 98 | return false; | ||
| 99 | |||
| 100 | if (!sig_only || should_send_signal(p)) | ||
| 101 | set_freeze_flag(p); | ||
| 102 | else | ||
| 103 | return false; | ||
| 104 | } | 117 | } |
| 105 | 118 | ||
| 106 | if (should_send_signal(p)) { | 119 | if (!(p->flags & PF_KTHREAD)) { |
| 107 | fake_signal_wake_up(p); | 120 | fake_signal_wake_up(p); |
| 108 | /* | 121 | /* |
| 109 | * fake_signal_wake_up() goes through p's scheduler | 122 | * fake_signal_wake_up() goes through p's scheduler |
| @@ -111,56 +124,48 @@ bool freeze_task(struct task_struct *p, bool sig_only) | |||
| 111 | * TASK_RUNNING transition can't race with task state | 124 | * TASK_RUNNING transition can't race with task state |
| 112 | * testing in try_to_freeze_tasks(). | 125 | * testing in try_to_freeze_tasks(). |
| 113 | */ | 126 | */ |
| 114 | } else if (sig_only) { | ||
| 115 | return false; | ||
| 116 | } else { | 127 | } else { |
| 117 | wake_up_state(p, TASK_INTERRUPTIBLE); | 128 | wake_up_state(p, TASK_INTERRUPTIBLE); |
| 118 | } | 129 | } |
| 119 | 130 | ||
| 131 | spin_unlock_irqrestore(&freezer_lock, flags); | ||
| 120 | return true; | 132 | return true; |
| 121 | } | 133 | } |
| 122 | 134 | ||
| 123 | void cancel_freezing(struct task_struct *p) | 135 | void __thaw_task(struct task_struct *p) |
| 124 | { | 136 | { |
| 125 | unsigned long flags; | 137 | unsigned long flags; |
| 126 | 138 | ||
| 127 | if (freezing(p)) { | 139 | /* |
| 128 | pr_debug(" clean up: %s\n", p->comm); | 140 | * Clear freezing and kick @p if FROZEN. Clearing is guaranteed to |
| 129 | clear_freeze_flag(p); | 141 | * be visible to @p as waking up implies wmb. Waking up inside |
| 130 | spin_lock_irqsave(&p->sighand->siglock, flags); | 142 | * freezer_lock also prevents wakeups from leaking outside |
| 131 | recalc_sigpending_and_wake(p); | 143 | * refrigerator. |
| 132 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 144 | */ |
| 133 | } | 145 | spin_lock_irqsave(&freezer_lock, flags); |
| 134 | } | 146 | if (frozen(p)) |
| 135 | 147 | wake_up_process(p); | |
| 136 | static int __thaw_process(struct task_struct *p) | 148 | spin_unlock_irqrestore(&freezer_lock, flags); |
| 137 | { | ||
| 138 | if (frozen(p)) { | ||
| 139 | p->flags &= ~PF_FROZEN; | ||
| 140 | return 1; | ||
| 141 | } | ||
| 142 | clear_freeze_flag(p); | ||
| 143 | return 0; | ||
| 144 | } | 149 | } |
| 145 | 150 | ||
| 146 | /* | 151 | /** |
| 147 | * Wake up a frozen process | 152 | * set_freezable - make %current freezable |
| 148 | * | 153 | * |
| 149 | * task_lock() is needed to prevent the race with refrigerator() which may | 154 | * Mark %current freezable and enter refrigerator if necessary. |
| 150 | * occur if the freezing of tasks fails. Namely, without the lock, if the | ||
| 151 | * freezing of tasks failed, thaw_tasks() might have run before a task in | ||
| 152 | * refrigerator() could call frozen_process(), in which case the task would be | ||
| 153 | * frozen and no one would thaw it. | ||
| 154 | */ | 155 | */ |
| 155 | int thaw_process(struct task_struct *p) | 156 | bool set_freezable(void) |
| 156 | { | 157 | { |
| 157 | task_lock(p); | 158 | might_sleep(); |
| 158 | if (__thaw_process(p) == 1) { | 159 | |
| 159 | task_unlock(p); | 160 | /* |
| 160 | wake_up_process(p); | 161 | * Modify flags while holding freezer_lock. This ensures the |
| 161 | return 1; | 162 | * freezer notices that we aren't frozen yet or the freezing |
| 162 | } | 163 | * condition is visible to try_to_freeze() below. |
| 163 | task_unlock(p); | 164 | */ |
| 164 | return 0; | 165 | spin_lock_irq(&freezer_lock); |
| 166 | current->flags &= ~PF_NOFREEZE; | ||
| 167 | spin_unlock_irq(&freezer_lock); | ||
| 168 | |||
| 169 | return try_to_freeze(); | ||
| 165 | } | 170 | } |
| 166 | EXPORT_SYMBOL(thaw_process); | 171 | EXPORT_SYMBOL(set_freezable); |
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index a73dd6c7372d..b7952316016a 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h | |||
| @@ -15,7 +15,7 @@ | |||
| 15 | 15 | ||
| 16 | #define istate core_internal_state__do_not_mess_with_it | 16 | #define istate core_internal_state__do_not_mess_with_it |
| 17 | 17 | ||
| 18 | extern int noirqdebug; | 18 | extern bool noirqdebug; |
| 19 | 19 | ||
| 20 | /* | 20 | /* |
| 21 | * Bits used by threaded handlers: | 21 | * Bits used by threaded handlers: |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 200ce832c585..1f9e26526b69 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
| @@ -135,6 +135,9 @@ int irq_domain_simple_dt_translate(struct irq_domain *d, | |||
| 135 | return -EINVAL; | 135 | return -EINVAL; |
| 136 | if (intsize < 1) | 136 | if (intsize < 1) |
| 137 | return -EINVAL; | 137 | return -EINVAL; |
| 138 | if (d->nr_irq && ((intspec[0] < d->hwirq_base) || | ||
| 139 | (intspec[0] >= d->hwirq_base + d->nr_irq))) | ||
| 140 | return -EINVAL; | ||
| 138 | 141 | ||
| 139 | *out_hwirq = intspec[0]; | 142 | *out_hwirq = intspec[0]; |
| 140 | *out_type = IRQ_TYPE_NONE; | 143 | *out_type = IRQ_TYPE_NONE; |
| @@ -143,11 +146,6 @@ int irq_domain_simple_dt_translate(struct irq_domain *d, | |||
| 143 | return 0; | 146 | return 0; |
| 144 | } | 147 | } |
| 145 | 148 | ||
| 146 | struct irq_domain_ops irq_domain_simple_ops = { | ||
| 147 | .dt_translate = irq_domain_simple_dt_translate, | ||
| 148 | }; | ||
| 149 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); | ||
| 150 | |||
| 151 | /** | 149 | /** |
| 152 | * irq_domain_create_simple() - Set up a 'simple' translation range | 150 | * irq_domain_create_simple() - Set up a 'simple' translation range |
| 153 | */ | 151 | */ |
| @@ -182,3 +180,10 @@ void irq_domain_generate_simple(const struct of_device_id *match, | |||
| 182 | } | 180 | } |
| 183 | EXPORT_SYMBOL_GPL(irq_domain_generate_simple); | 181 | EXPORT_SYMBOL_GPL(irq_domain_generate_simple); |
| 184 | #endif /* CONFIG_OF_IRQ */ | 182 | #endif /* CONFIG_OF_IRQ */ |
| 183 | |||
| 184 | struct irq_domain_ops irq_domain_simple_ops = { | ||
| 185 | #ifdef CONFIG_OF_IRQ | ||
| 186 | .dt_translate = irq_domain_simple_dt_translate, | ||
| 187 | #endif /* CONFIG_OF_IRQ */ | ||
| 188 | }; | ||
| 189 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1da999f5e746..a9a9dbe49fea 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -1292,7 +1292,7 @@ EXPORT_SYMBOL(free_irq); | |||
| 1292 | * and to set up the interrupt handler in the right order. | 1292 | * and to set up the interrupt handler in the right order. |
| 1293 | * | 1293 | * |
| 1294 | * If you want to set up a threaded irq handler for your device | 1294 | * If you want to set up a threaded irq handler for your device |
| 1295 | * then you need to supply @handler and @thread_fn. @handler ist | 1295 | * then you need to supply @handler and @thread_fn. @handler is |
| 1296 | * still called in hard interrupt context and has to check | 1296 | * still called in hard interrupt context and has to check |
| 1297 | * whether the interrupt originates from the device. If yes it | 1297 | * whether the interrupt originates from the device. If yes it |
| 1298 | * needs to disable the interrupt on the device and return | 1298 | * needs to disable the interrupt on the device and return |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index dc813a948be2..611cd6003c45 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
| @@ -325,7 +325,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
| 325 | desc->irqs_unhandled = 0; | 325 | desc->irqs_unhandled = 0; |
| 326 | } | 326 | } |
| 327 | 327 | ||
| 328 | int noirqdebug __read_mostly; | 328 | bool noirqdebug __read_mostly; |
| 329 | 329 | ||
| 330 | int noirqdebug_setup(char *str) | 330 | int noirqdebug_setup(char *str) |
| 331 | { | 331 | { |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 30c3c7708132..01d3b70fc98a 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
| @@ -71,6 +71,7 @@ void jump_label_inc(struct jump_label_key *key) | |||
| 71 | atomic_inc(&key->enabled); | 71 | atomic_inc(&key->enabled); |
| 72 | jump_label_unlock(); | 72 | jump_label_unlock(); |
| 73 | } | 73 | } |
| 74 | EXPORT_SYMBOL_GPL(jump_label_inc); | ||
| 74 | 75 | ||
| 75 | static void __jump_label_dec(struct jump_label_key *key, | 76 | static void __jump_label_dec(struct jump_label_key *key, |
| 76 | unsigned long rate_limit, struct delayed_work *work) | 77 | unsigned long rate_limit, struct delayed_work *work) |
| @@ -86,6 +87,7 @@ static void __jump_label_dec(struct jump_label_key *key, | |||
| 86 | 87 | ||
| 87 | jump_label_unlock(); | 88 | jump_label_unlock(); |
| 88 | } | 89 | } |
| 90 | EXPORT_SYMBOL_GPL(jump_label_dec); | ||
| 89 | 91 | ||
| 90 | static void jump_label_update_timeout(struct work_struct *work) | 92 | static void jump_label_update_timeout(struct work_struct *work) |
| 91 | { | 93 | { |
diff --git a/kernel/kexec.c b/kernel/kexec.c index dc7bc0829286..7b0886786701 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
| @@ -32,7 +32,6 @@ | |||
| 32 | #include <linux/console.h> | 32 | #include <linux/console.h> |
| 33 | #include <linux/vmalloc.h> | 33 | #include <linux/vmalloc.h> |
| 34 | #include <linux/swap.h> | 34 | #include <linux/swap.h> |
| 35 | #include <linux/kmsg_dump.h> | ||
| 36 | #include <linux/syscore_ops.h> | 35 | #include <linux/syscore_ops.h> |
| 37 | 36 | ||
| 38 | #include <asm/page.h> | 37 | #include <asm/page.h> |
| @@ -1094,8 +1093,6 @@ void crash_kexec(struct pt_regs *regs) | |||
| 1094 | if (kexec_crash_image) { | 1093 | if (kexec_crash_image) { |
| 1095 | struct pt_regs fixed_regs; | 1094 | struct pt_regs fixed_regs; |
| 1096 | 1095 | ||
| 1097 | kmsg_dump(KMSG_DUMP_KEXEC); | ||
| 1098 | |||
| 1099 | crash_setup_regs(&fixed_regs, regs); | 1096 | crash_setup_regs(&fixed_regs, regs); |
| 1100 | crash_save_vmcoreinfo(); | 1097 | crash_save_vmcoreinfo(); |
| 1101 | machine_crash_shutdown(&fixed_regs); | 1098 | machine_crash_shutdown(&fixed_regs); |
| @@ -1132,6 +1129,8 @@ int crash_shrink_memory(unsigned long new_size) | |||
| 1132 | { | 1129 | { |
| 1133 | int ret = 0; | 1130 | int ret = 0; |
| 1134 | unsigned long start, end; | 1131 | unsigned long start, end; |
| 1132 | unsigned long old_size; | ||
| 1133 | struct resource *ram_res; | ||
| 1135 | 1134 | ||
| 1136 | mutex_lock(&kexec_mutex); | 1135 | mutex_lock(&kexec_mutex); |
| 1137 | 1136 | ||
| @@ -1141,11 +1140,15 @@ int crash_shrink_memory(unsigned long new_size) | |||
| 1141 | } | 1140 | } |
| 1142 | start = crashk_res.start; | 1141 | start = crashk_res.start; |
| 1143 | end = crashk_res.end; | 1142 | end = crashk_res.end; |
| 1143 | old_size = (end == 0) ? 0 : end - start + 1; | ||
| 1144 | if (new_size >= old_size) { | ||
| 1145 | ret = (new_size == old_size) ? 0 : -EINVAL; | ||
| 1146 | goto unlock; | ||
| 1147 | } | ||
| 1144 | 1148 | ||
| 1145 | if (new_size >= end - start + 1) { | 1149 | ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); |
| 1146 | ret = -EINVAL; | 1150 | if (!ram_res) { |
| 1147 | if (new_size == end - start + 1) | 1151 | ret = -ENOMEM; |
| 1148 | ret = 0; | ||
| 1149 | goto unlock; | 1152 | goto unlock; |
| 1150 | } | 1153 | } |
| 1151 | 1154 | ||
| @@ -1157,7 +1160,15 @@ int crash_shrink_memory(unsigned long new_size) | |||
| 1157 | 1160 | ||
| 1158 | if ((start == end) && (crashk_res.parent != NULL)) | 1161 | if ((start == end) && (crashk_res.parent != NULL)) |
| 1159 | release_resource(&crashk_res); | 1162 | release_resource(&crashk_res); |
| 1163 | |||
| 1164 | ram_res->start = end; | ||
| 1165 | ram_res->end = crashk_res.end; | ||
| 1166 | ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; | ||
| 1167 | ram_res->name = "System RAM"; | ||
| 1168 | |||
| 1160 | crashk_res.end = end - 1; | 1169 | crashk_res.end = end - 1; |
| 1170 | |||
| 1171 | insert_resource(&iomem_resource, ram_res); | ||
| 1161 | crash_unmap_reserved_pages(); | 1172 | crash_unmap_reserved_pages(); |
| 1162 | 1173 | ||
| 1163 | unlock: | 1174 | unlock: |
| @@ -1523,7 +1534,7 @@ int kernel_kexec(void) | |||
| 1523 | 1534 | ||
| 1524 | #ifdef CONFIG_KEXEC_JUMP | 1535 | #ifdef CONFIG_KEXEC_JUMP |
| 1525 | if (kexec_image->preserve_context) { | 1536 | if (kexec_image->preserve_context) { |
| 1526 | mutex_lock(&pm_mutex); | 1537 | lock_system_sleep(); |
| 1527 | pm_prepare_console(); | 1538 | pm_prepare_console(); |
| 1528 | error = freeze_processes(); | 1539 | error = freeze_processes(); |
| 1529 | if (error) { | 1540 | if (error) { |
| @@ -1576,7 +1587,7 @@ int kernel_kexec(void) | |||
| 1576 | thaw_processes(); | 1587 | thaw_processes(); |
| 1577 | Restore_console: | 1588 | Restore_console: |
| 1578 | pm_restore_console(); | 1589 | pm_restore_console(); |
| 1579 | mutex_unlock(&pm_mutex); | 1590 | unlock_system_sleep(); |
| 1580 | } | 1591 | } |
| 1581 | #endif | 1592 | #endif |
| 1582 | 1593 | ||
diff --git a/kernel/kmod.c b/kernel/kmod.c index a4bea97c75b6..a0a88543934e 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include <linux/resource.h> | 36 | #include <linux/resource.h> |
| 37 | #include <linux/notifier.h> | 37 | #include <linux/notifier.h> |
| 38 | #include <linux/suspend.h> | 38 | #include <linux/suspend.h> |
| 39 | #include <linux/rwsem.h> | ||
| 39 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
| 40 | 41 | ||
| 41 | #include <trace/events/module.h> | 42 | #include <trace/events/module.h> |
| @@ -50,6 +51,7 @@ static struct workqueue_struct *khelper_wq; | |||
| 50 | static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; | 51 | static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; |
| 51 | static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; | 52 | static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; |
| 52 | static DEFINE_SPINLOCK(umh_sysctl_lock); | 53 | static DEFINE_SPINLOCK(umh_sysctl_lock); |
| 54 | static DECLARE_RWSEM(umhelper_sem); | ||
| 53 | 55 | ||
| 54 | #ifdef CONFIG_MODULES | 56 | #ifdef CONFIG_MODULES |
| 55 | 57 | ||
| @@ -275,6 +277,7 @@ static void __call_usermodehelper(struct work_struct *work) | |||
| 275 | * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY | 277 | * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY |
| 276 | * (used for preventing user land processes from being created after the user | 278 | * (used for preventing user land processes from being created after the user |
| 277 | * land has been frozen during a system-wide hibernation or suspend operation). | 279 | * land has been frozen during a system-wide hibernation or suspend operation). |
| 280 | * Should always be manipulated under umhelper_sem acquired for write. | ||
| 278 | */ | 281 | */ |
| 279 | static int usermodehelper_disabled = 1; | 282 | static int usermodehelper_disabled = 1; |
| 280 | 283 | ||
| @@ -282,17 +285,29 @@ static int usermodehelper_disabled = 1; | |||
| 282 | static atomic_t running_helpers = ATOMIC_INIT(0); | 285 | static atomic_t running_helpers = ATOMIC_INIT(0); |
| 283 | 286 | ||
| 284 | /* | 287 | /* |
| 285 | * Wait queue head used by usermodehelper_pm_callback() to wait for all running | 288 | * Wait queue head used by usermodehelper_disable() to wait for all running |
| 286 | * helpers to finish. | 289 | * helpers to finish. |
| 287 | */ | 290 | */ |
| 288 | static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); | 291 | static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); |
| 289 | 292 | ||
| 290 | /* | 293 | /* |
| 291 | * Time to wait for running_helpers to become zero before the setting of | 294 | * Time to wait for running_helpers to become zero before the setting of |
| 292 | * usermodehelper_disabled in usermodehelper_pm_callback() fails | 295 | * usermodehelper_disabled in usermodehelper_disable() fails |
| 293 | */ | 296 | */ |
| 294 | #define RUNNING_HELPERS_TIMEOUT (5 * HZ) | 297 | #define RUNNING_HELPERS_TIMEOUT (5 * HZ) |
| 295 | 298 | ||
| 299 | void read_lock_usermodehelper(void) | ||
| 300 | { | ||
| 301 | down_read(&umhelper_sem); | ||
| 302 | } | ||
| 303 | EXPORT_SYMBOL_GPL(read_lock_usermodehelper); | ||
| 304 | |||
| 305 | void read_unlock_usermodehelper(void) | ||
| 306 | { | ||
| 307 | up_read(&umhelper_sem); | ||
| 308 | } | ||
| 309 | EXPORT_SYMBOL_GPL(read_unlock_usermodehelper); | ||
| 310 | |||
| 296 | /** | 311 | /** |
| 297 | * usermodehelper_disable - prevent new helpers from being started | 312 | * usermodehelper_disable - prevent new helpers from being started |
| 298 | */ | 313 | */ |
| @@ -300,8 +315,10 @@ int usermodehelper_disable(void) | |||
| 300 | { | 315 | { |
| 301 | long retval; | 316 | long retval; |
| 302 | 317 | ||
| 318 | down_write(&umhelper_sem); | ||
| 303 | usermodehelper_disabled = 1; | 319 | usermodehelper_disabled = 1; |
| 304 | smp_mb(); | 320 | up_write(&umhelper_sem); |
| 321 | |||
| 305 | /* | 322 | /* |
| 306 | * From now on call_usermodehelper_exec() won't start any new | 323 | * From now on call_usermodehelper_exec() won't start any new |
| 307 | * helpers, so it is sufficient if running_helpers turns out to | 324 | * helpers, so it is sufficient if running_helpers turns out to |
| @@ -314,7 +331,9 @@ int usermodehelper_disable(void) | |||
| 314 | if (retval) | 331 | if (retval) |
| 315 | return 0; | 332 | return 0; |
| 316 | 333 | ||
| 334 | down_write(&umhelper_sem); | ||
| 317 | usermodehelper_disabled = 0; | 335 | usermodehelper_disabled = 0; |
| 336 | up_write(&umhelper_sem); | ||
| 318 | return -EAGAIN; | 337 | return -EAGAIN; |
| 319 | } | 338 | } |
| 320 | 339 | ||
| @@ -323,7 +342,9 @@ int usermodehelper_disable(void) | |||
| 323 | */ | 342 | */ |
| 324 | void usermodehelper_enable(void) | 343 | void usermodehelper_enable(void) |
| 325 | { | 344 | { |
| 345 | down_write(&umhelper_sem); | ||
| 326 | usermodehelper_disabled = 0; | 346 | usermodehelper_disabled = 0; |
| 347 | up_write(&umhelper_sem); | ||
| 327 | } | 348 | } |
| 328 | 349 | ||
| 329 | /** | 350 | /** |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e5d84644823b..95dd7212e610 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
| @@ -2198,7 +2198,7 @@ static ssize_t write_enabled_file_bool(struct file *file, | |||
| 2198 | const char __user *user_buf, size_t count, loff_t *ppos) | 2198 | const char __user *user_buf, size_t count, loff_t *ppos) |
| 2199 | { | 2199 | { |
| 2200 | char buf[32]; | 2200 | char buf[32]; |
| 2201 | int buf_size; | 2201 | size_t buf_size; |
| 2202 | 2202 | ||
| 2203 | buf_size = min(count, (sizeof(buf)-1)); | 2203 | buf_size = min(count, (sizeof(buf)-1)); |
| 2204 | if (copy_from_user(buf, user_buf, buf_size)) | 2204 | if (copy_from_user(buf, user_buf, buf_size)) |
diff --git a/kernel/kthread.c b/kernel/kthread.c index b6d216a92639..3d3de633702e 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
| @@ -59,6 +59,31 @@ int kthread_should_stop(void) | |||
| 59 | EXPORT_SYMBOL(kthread_should_stop); | 59 | EXPORT_SYMBOL(kthread_should_stop); |
| 60 | 60 | ||
| 61 | /** | 61 | /** |
| 62 | * kthread_freezable_should_stop - should this freezable kthread return now? | ||
| 63 | * @was_frozen: optional out parameter, indicates whether %current was frozen | ||
| 64 | * | ||
| 65 | * kthread_should_stop() for freezable kthreads, which will enter | ||
| 66 | * refrigerator if necessary. This function is safe from kthread_stop() / | ||
| 67 | * freezer deadlock and freezable kthreads should use this function instead | ||
| 68 | * of calling try_to_freeze() directly. | ||
| 69 | */ | ||
| 70 | bool kthread_freezable_should_stop(bool *was_frozen) | ||
| 71 | { | ||
| 72 | bool frozen = false; | ||
| 73 | |||
| 74 | might_sleep(); | ||
| 75 | |||
| 76 | if (unlikely(freezing(current))) | ||
| 77 | frozen = __refrigerator(true); | ||
| 78 | |||
| 79 | if (was_frozen) | ||
| 80 | *was_frozen = frozen; | ||
| 81 | |||
| 82 | return kthread_should_stop(); | ||
| 83 | } | ||
| 84 | EXPORT_SYMBOL_GPL(kthread_freezable_should_stop); | ||
| 85 | |||
| 86 | /** | ||
| 62 | * kthread_data - return data value specified on kthread creation | 87 | * kthread_data - return data value specified on kthread creation |
| 63 | * @task: kthread task in question | 88 | * @task: kthread task in question |
| 64 | * | 89 | * |
| @@ -257,7 +282,7 @@ int kthreadd(void *unused) | |||
| 257 | set_cpus_allowed_ptr(tsk, cpu_all_mask); | 282 | set_cpus_allowed_ptr(tsk, cpu_all_mask); |
| 258 | set_mems_allowed(node_states[N_HIGH_MEMORY]); | 283 | set_mems_allowed(node_states[N_HIGH_MEMORY]); |
| 259 | 284 | ||
| 260 | current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; | 285 | current->flags |= PF_NOFREEZE; |
| 261 | 286 | ||
| 262 | for (;;) { | 287 | for (;;) { |
| 263 | set_current_state(TASK_INTERRUPTIBLE); | 288 | set_current_state(TASK_INTERRUPTIBLE); |
diff --git a/kernel/module.c b/kernel/module.c index 178333c48d1e..2c932760fd33 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -62,12 +62,6 @@ | |||
| 62 | #define CREATE_TRACE_POINTS | 62 | #define CREATE_TRACE_POINTS |
| 63 | #include <trace/events/module.h> | 63 | #include <trace/events/module.h> |
| 64 | 64 | ||
| 65 | #if 0 | ||
| 66 | #define DEBUGP printk | ||
| 67 | #else | ||
| 68 | #define DEBUGP(fmt , a...) | ||
| 69 | #endif | ||
| 70 | |||
| 71 | #ifndef ARCH_SHF_SMALL | 65 | #ifndef ARCH_SHF_SMALL |
| 72 | #define ARCH_SHF_SMALL 0 | 66 | #define ARCH_SHF_SMALL 0 |
| 73 | #endif | 67 | #endif |
| @@ -138,7 +132,6 @@ struct load_info { | |||
| 138 | unsigned long len; | 132 | unsigned long len; |
| 139 | Elf_Shdr *sechdrs; | 133 | Elf_Shdr *sechdrs; |
| 140 | char *secstrings, *strtab; | 134 | char *secstrings, *strtab; |
| 141 | unsigned long *strmap; | ||
| 142 | unsigned long symoffs, stroffs; | 135 | unsigned long symoffs, stroffs; |
| 143 | struct _ddebug *debug; | 136 | struct _ddebug *debug; |
| 144 | unsigned int num_debug; | 137 | unsigned int num_debug; |
| @@ -410,7 +403,7 @@ const struct kernel_symbol *find_symbol(const char *name, | |||
| 410 | return fsa.sym; | 403 | return fsa.sym; |
| 411 | } | 404 | } |
| 412 | 405 | ||
| 413 | DEBUGP("Failed to find symbol %s\n", name); | 406 | pr_debug("Failed to find symbol %s\n", name); |
| 414 | return NULL; | 407 | return NULL; |
| 415 | } | 408 | } |
| 416 | EXPORT_SYMBOL_GPL(find_symbol); | 409 | EXPORT_SYMBOL_GPL(find_symbol); |
| @@ -600,11 +593,11 @@ static int already_uses(struct module *a, struct module *b) | |||
| 600 | 593 | ||
| 601 | list_for_each_entry(use, &b->source_list, source_list) { | 594 | list_for_each_entry(use, &b->source_list, source_list) { |
| 602 | if (use->source == a) { | 595 | if (use->source == a) { |
| 603 | DEBUGP("%s uses %s!\n", a->name, b->name); | 596 | pr_debug("%s uses %s!\n", a->name, b->name); |
| 604 | return 1; | 597 | return 1; |
| 605 | } | 598 | } |
| 606 | } | 599 | } |
| 607 | DEBUGP("%s does not use %s!\n", a->name, b->name); | 600 | pr_debug("%s does not use %s!\n", a->name, b->name); |
| 608 | return 0; | 601 | return 0; |
| 609 | } | 602 | } |
| 610 | 603 | ||
| @@ -619,7 +612,7 @@ static int add_module_usage(struct module *a, struct module *b) | |||
| 619 | { | 612 | { |
| 620 | struct module_use *use; | 613 | struct module_use *use; |
| 621 | 614 | ||
| 622 | DEBUGP("Allocating new usage for %s.\n", a->name); | 615 | pr_debug("Allocating new usage for %s.\n", a->name); |
| 623 | use = kmalloc(sizeof(*use), GFP_ATOMIC); | 616 | use = kmalloc(sizeof(*use), GFP_ATOMIC); |
| 624 | if (!use) { | 617 | if (!use) { |
| 625 | printk(KERN_WARNING "%s: out of memory loading\n", a->name); | 618 | printk(KERN_WARNING "%s: out of memory loading\n", a->name); |
| @@ -663,7 +656,7 @@ static void module_unload_free(struct module *mod) | |||
| 663 | mutex_lock(&module_mutex); | 656 | mutex_lock(&module_mutex); |
| 664 | list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) { | 657 | list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) { |
| 665 | struct module *i = use->target; | 658 | struct module *i = use->target; |
| 666 | DEBUGP("%s unusing %s\n", mod->name, i->name); | 659 | pr_debug("%s unusing %s\n", mod->name, i->name); |
| 667 | module_put(i); | 660 | module_put(i); |
| 668 | list_del(&use->source_list); | 661 | list_del(&use->source_list); |
| 669 | list_del(&use->target_list); | 662 | list_del(&use->target_list); |
| @@ -726,9 +719,9 @@ static int try_stop_module(struct module *mod, int flags, int *forced) | |||
| 726 | } | 719 | } |
| 727 | } | 720 | } |
| 728 | 721 | ||
| 729 | unsigned int module_refcount(struct module *mod) | 722 | unsigned long module_refcount(struct module *mod) |
| 730 | { | 723 | { |
| 731 | unsigned int incs = 0, decs = 0; | 724 | unsigned long incs = 0, decs = 0; |
| 732 | int cpu; | 725 | int cpu; |
| 733 | 726 | ||
| 734 | for_each_possible_cpu(cpu) | 727 | for_each_possible_cpu(cpu) |
| @@ -761,7 +754,7 @@ static void wait_for_zero_refcount(struct module *mod) | |||
| 761 | /* Since we might sleep for some time, release the mutex first */ | 754 | /* Since we might sleep for some time, release the mutex first */ |
| 762 | mutex_unlock(&module_mutex); | 755 | mutex_unlock(&module_mutex); |
| 763 | for (;;) { | 756 | for (;;) { |
| 764 | DEBUGP("Looking at refcount...\n"); | 757 | pr_debug("Looking at refcount...\n"); |
| 765 | set_current_state(TASK_UNINTERRUPTIBLE); | 758 | set_current_state(TASK_UNINTERRUPTIBLE); |
| 766 | if (module_refcount(mod) == 0) | 759 | if (module_refcount(mod) == 0) |
| 767 | break; | 760 | break; |
| @@ -804,7 +797,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
| 804 | if (mod->state != MODULE_STATE_LIVE) { | 797 | if (mod->state != MODULE_STATE_LIVE) { |
| 805 | /* FIXME: if (force), slam module count and wake up | 798 | /* FIXME: if (force), slam module count and wake up |
| 806 | waiter --RR */ | 799 | waiter --RR */ |
| 807 | DEBUGP("%s already dying\n", mod->name); | 800 | pr_debug("%s already dying\n", mod->name); |
| 808 | ret = -EBUSY; | 801 | ret = -EBUSY; |
| 809 | goto out; | 802 | goto out; |
| 810 | } | 803 | } |
| @@ -854,7 +847,7 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) | |||
| 854 | struct module_use *use; | 847 | struct module_use *use; |
| 855 | int printed_something = 0; | 848 | int printed_something = 0; |
| 856 | 849 | ||
| 857 | seq_printf(m, " %u ", module_refcount(mod)); | 850 | seq_printf(m, " %lu ", module_refcount(mod)); |
| 858 | 851 | ||
| 859 | /* Always include a trailing , so userspace can differentiate | 852 | /* Always include a trailing , so userspace can differentiate |
| 860 | between this and the old multi-field proc format. */ | 853 | between this and the old multi-field proc format. */ |
| @@ -904,13 +897,11 @@ EXPORT_SYMBOL_GPL(symbol_put_addr); | |||
| 904 | static ssize_t show_refcnt(struct module_attribute *mattr, | 897 | static ssize_t show_refcnt(struct module_attribute *mattr, |
| 905 | struct module_kobject *mk, char *buffer) | 898 | struct module_kobject *mk, char *buffer) |
| 906 | { | 899 | { |
| 907 | return sprintf(buffer, "%u\n", module_refcount(mk->mod)); | 900 | return sprintf(buffer, "%lu\n", module_refcount(mk->mod)); |
| 908 | } | 901 | } |
| 909 | 902 | ||
| 910 | static struct module_attribute refcnt = { | 903 | static struct module_attribute modinfo_refcnt = |
| 911 | .attr = { .name = "refcnt", .mode = 0444 }, | 904 | __ATTR(refcnt, 0444, show_refcnt, NULL); |
| 912 | .show = show_refcnt, | ||
| 913 | }; | ||
| 914 | 905 | ||
| 915 | void module_put(struct module *module) | 906 | void module_put(struct module *module) |
| 916 | { | 907 | { |
| @@ -951,6 +942,26 @@ static inline int module_unload_init(struct module *mod) | |||
| 951 | } | 942 | } |
| 952 | #endif /* CONFIG_MODULE_UNLOAD */ | 943 | #endif /* CONFIG_MODULE_UNLOAD */ |
| 953 | 944 | ||
| 945 | static size_t module_flags_taint(struct module *mod, char *buf) | ||
| 946 | { | ||
| 947 | size_t l = 0; | ||
| 948 | |||
| 949 | if (mod->taints & (1 << TAINT_PROPRIETARY_MODULE)) | ||
| 950 | buf[l++] = 'P'; | ||
| 951 | if (mod->taints & (1 << TAINT_OOT_MODULE)) | ||
| 952 | buf[l++] = 'O'; | ||
| 953 | if (mod->taints & (1 << TAINT_FORCED_MODULE)) | ||
| 954 | buf[l++] = 'F'; | ||
| 955 | if (mod->taints & (1 << TAINT_CRAP)) | ||
| 956 | buf[l++] = 'C'; | ||
| 957 | /* | ||
| 958 | * TAINT_FORCED_RMMOD: could be added. | ||
| 959 | * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't | ||
| 960 | * apply to modules. | ||
| 961 | */ | ||
| 962 | return l; | ||
| 963 | } | ||
| 964 | |||
| 954 | static ssize_t show_initstate(struct module_attribute *mattr, | 965 | static ssize_t show_initstate(struct module_attribute *mattr, |
| 955 | struct module_kobject *mk, char *buffer) | 966 | struct module_kobject *mk, char *buffer) |
| 956 | { | 967 | { |
| @@ -970,10 +981,8 @@ static ssize_t show_initstate(struct module_attribute *mattr, | |||
| 970 | return sprintf(buffer, "%s\n", state); | 981 | return sprintf(buffer, "%s\n", state); |
| 971 | } | 982 | } |
| 972 | 983 | ||
| 973 | static struct module_attribute initstate = { | 984 | static struct module_attribute modinfo_initstate = |
| 974 | .attr = { .name = "initstate", .mode = 0444 }, | 985 | __ATTR(initstate, 0444, show_initstate, NULL); |
| 975 | .show = show_initstate, | ||
| 976 | }; | ||
| 977 | 986 | ||
| 978 | static ssize_t store_uevent(struct module_attribute *mattr, | 987 | static ssize_t store_uevent(struct module_attribute *mattr, |
| 979 | struct module_kobject *mk, | 988 | struct module_kobject *mk, |
| @@ -986,18 +995,50 @@ static ssize_t store_uevent(struct module_attribute *mattr, | |||
| 986 | return count; | 995 | return count; |
| 987 | } | 996 | } |
| 988 | 997 | ||
| 989 | struct module_attribute module_uevent = { | 998 | struct module_attribute module_uevent = |
| 990 | .attr = { .name = "uevent", .mode = 0200 }, | 999 | __ATTR(uevent, 0200, NULL, store_uevent); |
| 991 | .store = store_uevent, | 1000 | |
| 992 | }; | 1001 | static ssize_t show_coresize(struct module_attribute *mattr, |
| 1002 | struct module_kobject *mk, char *buffer) | ||
| 1003 | { | ||
| 1004 | return sprintf(buffer, "%u\n", mk->mod->core_size); | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | static struct module_attribute modinfo_coresize = | ||
| 1008 | __ATTR(coresize, 0444, show_coresize, NULL); | ||
| 1009 | |||
| 1010 | static ssize_t show_initsize(struct module_attribute *mattr, | ||
| 1011 | struct module_kobject *mk, char *buffer) | ||
| 1012 | { | ||
| 1013 | return sprintf(buffer, "%u\n", mk->mod->init_size); | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | static struct module_attribute modinfo_initsize = | ||
| 1017 | __ATTR(initsize, 0444, show_initsize, NULL); | ||
| 1018 | |||
| 1019 | static ssize_t show_taint(struct module_attribute *mattr, | ||
| 1020 | struct module_kobject *mk, char *buffer) | ||
| 1021 | { | ||
| 1022 | size_t l; | ||
| 1023 | |||
| 1024 | l = module_flags_taint(mk->mod, buffer); | ||
| 1025 | buffer[l++] = '\n'; | ||
| 1026 | return l; | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | static struct module_attribute modinfo_taint = | ||
| 1030 | __ATTR(taint, 0444, show_taint, NULL); | ||
| 993 | 1031 | ||
| 994 | static struct module_attribute *modinfo_attrs[] = { | 1032 | static struct module_attribute *modinfo_attrs[] = { |
| 1033 | &module_uevent, | ||
| 995 | &modinfo_version, | 1034 | &modinfo_version, |
| 996 | &modinfo_srcversion, | 1035 | &modinfo_srcversion, |
| 997 | &initstate, | 1036 | &modinfo_initstate, |
| 998 | &module_uevent, | 1037 | &modinfo_coresize, |
| 1038 | &modinfo_initsize, | ||
| 1039 | &modinfo_taint, | ||
| 999 | #ifdef CONFIG_MODULE_UNLOAD | 1040 | #ifdef CONFIG_MODULE_UNLOAD |
| 1000 | &refcnt, | 1041 | &modinfo_refcnt, |
| 1001 | #endif | 1042 | #endif |
| 1002 | NULL, | 1043 | NULL, |
| 1003 | }; | 1044 | }; |
| @@ -1057,7 +1098,7 @@ static int check_version(Elf_Shdr *sechdrs, | |||
| 1057 | 1098 | ||
| 1058 | if (versions[i].crc == maybe_relocated(*crc, crc_owner)) | 1099 | if (versions[i].crc == maybe_relocated(*crc, crc_owner)) |
| 1059 | return 1; | 1100 | return 1; |
| 1060 | DEBUGP("Found checksum %lX vs module %lX\n", | 1101 | pr_debug("Found checksum %lX vs module %lX\n", |
| 1061 | maybe_relocated(*crc, crc_owner), versions[i].crc); | 1102 | maybe_relocated(*crc, crc_owner), versions[i].crc); |
| 1062 | goto bad_version; | 1103 | goto bad_version; |
| 1063 | } | 1104 | } |
| @@ -1834,7 +1875,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) | |||
| 1834 | case SHN_COMMON: | 1875 | case SHN_COMMON: |
| 1835 | /* We compiled with -fno-common. These are not | 1876 | /* We compiled with -fno-common. These are not |
| 1836 | supposed to happen. */ | 1877 | supposed to happen. */ |
| 1837 | DEBUGP("Common symbol: %s\n", name); | 1878 | pr_debug("Common symbol: %s\n", name); |
| 1838 | printk("%s: please compile with -fno-common\n", | 1879 | printk("%s: please compile with -fno-common\n", |
| 1839 | mod->name); | 1880 | mod->name); |
| 1840 | ret = -ENOEXEC; | 1881 | ret = -ENOEXEC; |
| @@ -1842,7 +1883,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) | |||
| 1842 | 1883 | ||
| 1843 | case SHN_ABS: | 1884 | case SHN_ABS: |
| 1844 | /* Don't need to do anything */ | 1885 | /* Don't need to do anything */ |
| 1845 | DEBUGP("Absolute symbol: 0x%08lx\n", | 1886 | pr_debug("Absolute symbol: 0x%08lx\n", |
| 1846 | (long)sym[i].st_value); | 1887 | (long)sym[i].st_value); |
| 1847 | break; | 1888 | break; |
| 1848 | 1889 | ||
| @@ -1966,7 +2007,7 @@ static void layout_sections(struct module *mod, struct load_info *info) | |||
| 1966 | for (i = 0; i < info->hdr->e_shnum; i++) | 2007 | for (i = 0; i < info->hdr->e_shnum; i++) |
| 1967 | info->sechdrs[i].sh_entsize = ~0UL; | 2008 | info->sechdrs[i].sh_entsize = ~0UL; |
| 1968 | 2009 | ||
| 1969 | DEBUGP("Core section allocation order:\n"); | 2010 | pr_debug("Core section allocation order:\n"); |
| 1970 | for (m = 0; m < ARRAY_SIZE(masks); ++m) { | 2011 | for (m = 0; m < ARRAY_SIZE(masks); ++m) { |
| 1971 | for (i = 0; i < info->hdr->e_shnum; ++i) { | 2012 | for (i = 0; i < info->hdr->e_shnum; ++i) { |
| 1972 | Elf_Shdr *s = &info->sechdrs[i]; | 2013 | Elf_Shdr *s = &info->sechdrs[i]; |
| @@ -1978,7 +2019,7 @@ static void layout_sections(struct module *mod, struct load_info *info) | |||
| 1978 | || strstarts(sname, ".init")) | 2019 | || strstarts(sname, ".init")) |
| 1979 | continue; | 2020 | continue; |
| 1980 | s->sh_entsize = get_offset(mod, &mod->core_size, s, i); | 2021 | s->sh_entsize = get_offset(mod, &mod->core_size, s, i); |
| 1981 | DEBUGP("\t%s\n", name); | 2022 | pr_debug("\t%s\n", sname); |
| 1982 | } | 2023 | } |
| 1983 | switch (m) { | 2024 | switch (m) { |
| 1984 | case 0: /* executable */ | 2025 | case 0: /* executable */ |
| @@ -1995,7 +2036,7 @@ static void layout_sections(struct module *mod, struct load_info *info) | |||
| 1995 | } | 2036 | } |
| 1996 | } | 2037 | } |
| 1997 | 2038 | ||
| 1998 | DEBUGP("Init section allocation order:\n"); | 2039 | pr_debug("Init section allocation order:\n"); |
| 1999 | for (m = 0; m < ARRAY_SIZE(masks); ++m) { | 2040 | for (m = 0; m < ARRAY_SIZE(masks); ++m) { |
| 2000 | for (i = 0; i < info->hdr->e_shnum; ++i) { | 2041 | for (i = 0; i < info->hdr->e_shnum; ++i) { |
| 2001 | Elf_Shdr *s = &info->sechdrs[i]; | 2042 | Elf_Shdr *s = &info->sechdrs[i]; |
| @@ -2008,7 +2049,7 @@ static void layout_sections(struct module *mod, struct load_info *info) | |||
| 2008 | continue; | 2049 | continue; |
| 2009 | s->sh_entsize = (get_offset(mod, &mod->init_size, s, i) | 2050 | s->sh_entsize = (get_offset(mod, &mod->init_size, s, i) |
| 2010 | | INIT_OFFSET_MASK); | 2051 | | INIT_OFFSET_MASK); |
| 2011 | DEBUGP("\t%s\n", sname); | 2052 | pr_debug("\t%s\n", sname); |
| 2012 | } | 2053 | } |
| 2013 | switch (m) { | 2054 | switch (m) { |
| 2014 | case 0: /* executable */ | 2055 | case 0: /* executable */ |
| @@ -2178,45 +2219,46 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, | |||
| 2178 | return true; | 2219 | return true; |
| 2179 | } | 2220 | } |
| 2180 | 2221 | ||
| 2222 | /* | ||
| 2223 | * We only allocate and copy the strings needed by the parts of symtab | ||
| 2224 | * we keep. This is simple, but has the effect of making multiple | ||
| 2225 | * copies of duplicates. We could be more sophisticated, see | ||
| 2226 | * linux-kernel thread starting with | ||
| 2227 | * <73defb5e4bca04a6431392cc341112b1@localhost>. | ||
| 2228 | */ | ||
| 2181 | static void layout_symtab(struct module *mod, struct load_info *info) | 2229 | static void layout_symtab(struct module *mod, struct load_info *info) |
| 2182 | { | 2230 | { |
| 2183 | Elf_Shdr *symsect = info->sechdrs + info->index.sym; | 2231 | Elf_Shdr *symsect = info->sechdrs + info->index.sym; |
| 2184 | Elf_Shdr *strsect = info->sechdrs + info->index.str; | 2232 | Elf_Shdr *strsect = info->sechdrs + info->index.str; |
| 2185 | const Elf_Sym *src; | 2233 | const Elf_Sym *src; |
| 2186 | unsigned int i, nsrc, ndst; | 2234 | unsigned int i, nsrc, ndst, strtab_size; |
| 2187 | 2235 | ||
| 2188 | /* Put symbol section at end of init part of module. */ | 2236 | /* Put symbol section at end of init part of module. */ |
| 2189 | symsect->sh_flags |= SHF_ALLOC; | 2237 | symsect->sh_flags |= SHF_ALLOC; |
| 2190 | symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, | 2238 | symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, |
| 2191 | info->index.sym) | INIT_OFFSET_MASK; | 2239 | info->index.sym) | INIT_OFFSET_MASK; |
| 2192 | DEBUGP("\t%s\n", info->secstrings + symsect->sh_name); | 2240 | pr_debug("\t%s\n", info->secstrings + symsect->sh_name); |
| 2193 | 2241 | ||
| 2194 | src = (void *)info->hdr + symsect->sh_offset; | 2242 | src = (void *)info->hdr + symsect->sh_offset; |
| 2195 | nsrc = symsect->sh_size / sizeof(*src); | 2243 | nsrc = symsect->sh_size / sizeof(*src); |
| 2196 | for (ndst = i = 1; i < nsrc; ++i, ++src) | ||
| 2197 | if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) { | ||
| 2198 | unsigned int j = src->st_name; | ||
| 2199 | 2244 | ||
| 2200 | while (!__test_and_set_bit(j, info->strmap) | 2245 | /* Compute total space required for the core symbols' strtab. */ |
| 2201 | && info->strtab[j]) | 2246 | for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src) |
| 2202 | ++j; | 2247 | if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) { |
| 2203 | ++ndst; | 2248 | strtab_size += strlen(&info->strtab[src->st_name]) + 1; |
| 2249 | ndst++; | ||
| 2204 | } | 2250 | } |
| 2205 | 2251 | ||
| 2206 | /* Append room for core symbols at end of core part. */ | 2252 | /* Append room for core symbols at end of core part. */ |
| 2207 | info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); | 2253 | info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); |
| 2208 | mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym); | 2254 | info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym); |
| 2255 | mod->core_size += strtab_size; | ||
| 2209 | 2256 | ||
| 2210 | /* Put string table section at end of init part of module. */ | 2257 | /* Put string table section at end of init part of module. */ |
| 2211 | strsect->sh_flags |= SHF_ALLOC; | 2258 | strsect->sh_flags |= SHF_ALLOC; |
| 2212 | strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, | 2259 | strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, |
| 2213 | info->index.str) | INIT_OFFSET_MASK; | 2260 | info->index.str) | INIT_OFFSET_MASK; |
| 2214 | DEBUGP("\t%s\n", info->secstrings + strsect->sh_name); | 2261 | pr_debug("\t%s\n", info->secstrings + strsect->sh_name); |
| 2215 | |||
| 2216 | /* Append room for core symbols' strings at end of core part. */ | ||
| 2217 | info->stroffs = mod->core_size; | ||
| 2218 | __set_bit(0, info->strmap); | ||
| 2219 | mod->core_size += bitmap_weight(info->strmap, strsect->sh_size); | ||
| 2220 | } | 2262 | } |
| 2221 | 2263 | ||
| 2222 | static void add_kallsyms(struct module *mod, const struct load_info *info) | 2264 | static void add_kallsyms(struct module *mod, const struct load_info *info) |
| @@ -2237,22 +2279,19 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) | |||
| 2237 | mod->symtab[i].st_info = elf_type(&mod->symtab[i], info); | 2279 | mod->symtab[i].st_info = elf_type(&mod->symtab[i], info); |
| 2238 | 2280 | ||
| 2239 | mod->core_symtab = dst = mod->module_core + info->symoffs; | 2281 | mod->core_symtab = dst = mod->module_core + info->symoffs; |
| 2282 | mod->core_strtab = s = mod->module_core + info->stroffs; | ||
| 2240 | src = mod->symtab; | 2283 | src = mod->symtab; |
| 2241 | *dst = *src; | 2284 | *dst = *src; |
| 2285 | *s++ = 0; | ||
| 2242 | for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { | 2286 | for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { |
| 2243 | if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) | 2287 | if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) |
| 2244 | continue; | 2288 | continue; |
| 2289 | |||
| 2245 | dst[ndst] = *src; | 2290 | dst[ndst] = *src; |
| 2246 | dst[ndst].st_name = bitmap_weight(info->strmap, | 2291 | dst[ndst++].st_name = s - mod->core_strtab; |
| 2247 | dst[ndst].st_name); | 2292 | s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1; |
| 2248 | ++ndst; | ||
| 2249 | } | 2293 | } |
| 2250 | mod->core_num_syms = ndst; | 2294 | mod->core_num_syms = ndst; |
| 2251 | |||
| 2252 | mod->core_strtab = s = mod->module_core + info->stroffs; | ||
| 2253 | for (*s = 0, i = 1; i < info->sechdrs[info->index.str].sh_size; ++i) | ||
| 2254 | if (test_bit(i, info->strmap)) | ||
| 2255 | *++s = mod->strtab[i]; | ||
| 2256 | } | 2295 | } |
| 2257 | #else | 2296 | #else |
| 2258 | static inline void layout_symtab(struct module *mod, struct load_info *info) | 2297 | static inline void layout_symtab(struct module *mod, struct load_info *info) |
| @@ -2621,7 +2660,7 @@ static int move_module(struct module *mod, struct load_info *info) | |||
| 2621 | mod->module_init = ptr; | 2660 | mod->module_init = ptr; |
| 2622 | 2661 | ||
| 2623 | /* Transfer each section which specifies SHF_ALLOC */ | 2662 | /* Transfer each section which specifies SHF_ALLOC */ |
| 2624 | DEBUGP("final section addresses:\n"); | 2663 | pr_debug("final section addresses:\n"); |
| 2625 | for (i = 0; i < info->hdr->e_shnum; i++) { | 2664 | for (i = 0; i < info->hdr->e_shnum; i++) { |
| 2626 | void *dest; | 2665 | void *dest; |
| 2627 | Elf_Shdr *shdr = &info->sechdrs[i]; | 2666 | Elf_Shdr *shdr = &info->sechdrs[i]; |
| @@ -2639,8 +2678,8 @@ static int move_module(struct module *mod, struct load_info *info) | |||
| 2639 | memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size); | 2678 | memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size); |
| 2640 | /* Update sh_addr to point to copy in image. */ | 2679 | /* Update sh_addr to point to copy in image. */ |
| 2641 | shdr->sh_addr = (unsigned long)dest; | 2680 | shdr->sh_addr = (unsigned long)dest; |
| 2642 | DEBUGP("\t0x%lx %s\n", | 2681 | pr_debug("\t0x%lx %s\n", |
| 2643 | shdr->sh_addr, info->secstrings + shdr->sh_name); | 2682 | (long)shdr->sh_addr, info->secstrings + shdr->sh_name); |
| 2644 | } | 2683 | } |
| 2645 | 2684 | ||
| 2646 | return 0; | 2685 | return 0; |
| @@ -2742,27 +2781,18 @@ static struct module *layout_and_allocate(struct load_info *info) | |||
| 2742 | this is done generically; there doesn't appear to be any | 2781 | this is done generically; there doesn't appear to be any |
| 2743 | special cases for the architectures. */ | 2782 | special cases for the architectures. */ |
| 2744 | layout_sections(mod, info); | 2783 | layout_sections(mod, info); |
| 2745 | |||
| 2746 | info->strmap = kzalloc(BITS_TO_LONGS(info->sechdrs[info->index.str].sh_size) | ||
| 2747 | * sizeof(long), GFP_KERNEL); | ||
| 2748 | if (!info->strmap) { | ||
| 2749 | err = -ENOMEM; | ||
| 2750 | goto free_percpu; | ||
| 2751 | } | ||
| 2752 | layout_symtab(mod, info); | 2784 | layout_symtab(mod, info); |
| 2753 | 2785 | ||
| 2754 | /* Allocate and move to the final place */ | 2786 | /* Allocate and move to the final place */ |
| 2755 | err = move_module(mod, info); | 2787 | err = move_module(mod, info); |
| 2756 | if (err) | 2788 | if (err) |
| 2757 | goto free_strmap; | 2789 | goto free_percpu; |
| 2758 | 2790 | ||
| 2759 | /* Module has been copied to its final place now: return it. */ | 2791 | /* Module has been copied to its final place now: return it. */ |
| 2760 | mod = (void *)info->sechdrs[info->index.mod].sh_addr; | 2792 | mod = (void *)info->sechdrs[info->index.mod].sh_addr; |
| 2761 | kmemleak_load_module(mod, info); | 2793 | kmemleak_load_module(mod, info); |
| 2762 | return mod; | 2794 | return mod; |
| 2763 | 2795 | ||
| 2764 | free_strmap: | ||
| 2765 | kfree(info->strmap); | ||
| 2766 | free_percpu: | 2796 | free_percpu: |
| 2767 | percpu_modfree(mod); | 2797 | percpu_modfree(mod); |
| 2768 | out: | 2798 | out: |
| @@ -2772,7 +2802,6 @@ out: | |||
| 2772 | /* mod is no longer valid after this! */ | 2802 | /* mod is no longer valid after this! */ |
| 2773 | static void module_deallocate(struct module *mod, struct load_info *info) | 2803 | static void module_deallocate(struct module *mod, struct load_info *info) |
| 2774 | { | 2804 | { |
| 2775 | kfree(info->strmap); | ||
| 2776 | percpu_modfree(mod); | 2805 | percpu_modfree(mod); |
| 2777 | module_free(mod, mod->module_init); | 2806 | module_free(mod, mod->module_init); |
| 2778 | module_free(mod, mod->module_core); | 2807 | module_free(mod, mod->module_core); |
| @@ -2811,7 +2840,7 @@ static struct module *load_module(void __user *umod, | |||
| 2811 | struct module *mod; | 2840 | struct module *mod; |
| 2812 | long err; | 2841 | long err; |
| 2813 | 2842 | ||
| 2814 | DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", | 2843 | pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n", |
| 2815 | umod, len, uargs); | 2844 | umod, len, uargs); |
| 2816 | 2845 | ||
| 2817 | /* Copy in the blobs from userspace, check they are vaguely sane. */ | 2846 | /* Copy in the blobs from userspace, check they are vaguely sane. */ |
| @@ -2902,8 +2931,7 @@ static struct module *load_module(void __user *umod, | |||
| 2902 | if (err < 0) | 2931 | if (err < 0) |
| 2903 | goto unlink; | 2932 | goto unlink; |
| 2904 | 2933 | ||
| 2905 | /* Get rid of temporary copy and strmap. */ | 2934 | /* Get rid of temporary copy. */ |
| 2906 | kfree(info.strmap); | ||
| 2907 | free_copy(&info); | 2935 | free_copy(&info); |
| 2908 | 2936 | ||
| 2909 | /* Done! */ | 2937 | /* Done! */ |
| @@ -3256,20 +3284,7 @@ static char *module_flags(struct module *mod, char *buf) | |||
| 3256 | mod->state == MODULE_STATE_GOING || | 3284 | mod->state == MODULE_STATE_GOING || |
| 3257 | mod->state == MODULE_STATE_COMING) { | 3285 | mod->state == MODULE_STATE_COMING) { |
| 3258 | buf[bx++] = '('; | 3286 | buf[bx++] = '('; |
| 3259 | if (mod->taints & (1 << TAINT_PROPRIETARY_MODULE)) | 3287 | bx += module_flags_taint(mod, buf + bx); |
| 3260 | buf[bx++] = 'P'; | ||
| 3261 | else if (mod->taints & (1 << TAINT_OOT_MODULE)) | ||
| 3262 | buf[bx++] = 'O'; | ||
| 3263 | if (mod->taints & (1 << TAINT_FORCED_MODULE)) | ||
| 3264 | buf[bx++] = 'F'; | ||
| 3265 | if (mod->taints & (1 << TAINT_CRAP)) | ||
| 3266 | buf[bx++] = 'C'; | ||
| 3267 | /* | ||
| 3268 | * TAINT_FORCED_RMMOD: could be added. | ||
| 3269 | * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't | ||
| 3270 | * apply to modules. | ||
| 3271 | */ | ||
| 3272 | |||
| 3273 | /* Show a - for module-is-being-unloaded */ | 3288 | /* Show a - for module-is-being-unloaded */ |
| 3274 | if (mod->state == MODULE_STATE_GOING) | 3289 | if (mod->state == MODULE_STATE_GOING) |
| 3275 | buf[bx++] = '-'; | 3290 | buf[bx++] = '-'; |
diff --git a/kernel/panic.c b/kernel/panic.c index 3458469eb7c3..80aed44e345a 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
| @@ -49,6 +49,15 @@ static long no_blink(int state) | |||
| 49 | long (*panic_blink)(int state); | 49 | long (*panic_blink)(int state); |
| 50 | EXPORT_SYMBOL(panic_blink); | 50 | EXPORT_SYMBOL(panic_blink); |
| 51 | 51 | ||
| 52 | /* | ||
| 53 | * Stop ourself in panic -- architecture code may override this | ||
| 54 | */ | ||
| 55 | void __weak panic_smp_self_stop(void) | ||
| 56 | { | ||
| 57 | while (1) | ||
| 58 | cpu_relax(); | ||
| 59 | } | ||
| 60 | |||
| 52 | /** | 61 | /** |
| 53 | * panic - halt the system | 62 | * panic - halt the system |
| 54 | * @fmt: The text string to print | 63 | * @fmt: The text string to print |
| @@ -57,8 +66,9 @@ EXPORT_SYMBOL(panic_blink); | |||
| 57 | * | 66 | * |
| 58 | * This function never returns. | 67 | * This function never returns. |
| 59 | */ | 68 | */ |
| 60 | NORET_TYPE void panic(const char * fmt, ...) | 69 | void panic(const char *fmt, ...) |
| 61 | { | 70 | { |
| 71 | static DEFINE_SPINLOCK(panic_lock); | ||
| 62 | static char buf[1024]; | 72 | static char buf[1024]; |
| 63 | va_list args; | 73 | va_list args; |
| 64 | long i, i_next = 0; | 74 | long i, i_next = 0; |
| @@ -68,8 +78,14 @@ NORET_TYPE void panic(const char * fmt, ...) | |||
| 68 | * It's possible to come here directly from a panic-assertion and | 78 | * It's possible to come here directly from a panic-assertion and |
| 69 | * not have preempt disabled. Some functions called from here want | 79 | * not have preempt disabled. Some functions called from here want |
| 70 | * preempt to be disabled. No point enabling it later though... | 80 | * preempt to be disabled. No point enabling it later though... |
| 81 | * | ||
| 82 | * Only one CPU is allowed to execute the panic code from here. For | ||
| 83 | * multiple parallel invocations of panic, all other CPUs either | ||
| 84 | * stop themself or will wait until they are stopped by the 1st CPU | ||
| 85 | * with smp_send_stop(). | ||
| 71 | */ | 86 | */ |
| 72 | preempt_disable(); | 87 | if (!spin_trylock(&panic_lock)) |
| 88 | panic_smp_self_stop(); | ||
| 73 | 89 | ||
| 74 | console_verbose(); | 90 | console_verbose(); |
| 75 | bust_spinlocks(1); | 91 | bust_spinlocks(1); |
| @@ -78,7 +94,11 @@ NORET_TYPE void panic(const char * fmt, ...) | |||
| 78 | va_end(args); | 94 | va_end(args); |
| 79 | printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); | 95 | printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); |
| 80 | #ifdef CONFIG_DEBUG_BUGVERBOSE | 96 | #ifdef CONFIG_DEBUG_BUGVERBOSE |
| 81 | dump_stack(); | 97 | /* |
| 98 | * Avoid nested stack-dumping if a panic occurs during oops processing | ||
| 99 | */ | ||
| 100 | if (!oops_in_progress) | ||
| 101 | dump_stack(); | ||
| 82 | #endif | 102 | #endif |
| 83 | 103 | ||
| 84 | /* | 104 | /* |
diff --git a/kernel/params.c b/kernel/params.c index 65aae11eb93f..32ee04308285 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
| @@ -25,12 +25,6 @@ | |||
| 25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
| 26 | #include <linux/ctype.h> | 26 | #include <linux/ctype.h> |
| 27 | 27 | ||
| 28 | #if 0 | ||
| 29 | #define DEBUGP printk | ||
| 30 | #else | ||
| 31 | #define DEBUGP(fmt, a...) | ||
| 32 | #endif | ||
| 33 | |||
| 34 | /* Protects all parameters, and incidentally kmalloced_param list. */ | 28 | /* Protects all parameters, and incidentally kmalloced_param list. */ |
| 35 | static DEFINE_MUTEX(param_lock); | 29 | static DEFINE_MUTEX(param_lock); |
| 36 | 30 | ||
| @@ -105,7 +99,7 @@ static int parse_one(char *param, | |||
| 105 | /* No one handled NULL, so do it here. */ | 99 | /* No one handled NULL, so do it here. */ |
| 106 | if (!val && params[i].ops->set != param_set_bool) | 100 | if (!val && params[i].ops->set != param_set_bool) |
| 107 | return -EINVAL; | 101 | return -EINVAL; |
| 108 | DEBUGP("They are equal! Calling %p\n", | 102 | pr_debug("They are equal! Calling %p\n", |
| 109 | params[i].ops->set); | 103 | params[i].ops->set); |
| 110 | mutex_lock(¶m_lock); | 104 | mutex_lock(¶m_lock); |
| 111 | err = params[i].ops->set(val, ¶ms[i]); | 105 | err = params[i].ops->set(val, ¶ms[i]); |
| @@ -115,11 +109,11 @@ static int parse_one(char *param, | |||
| 115 | } | 109 | } |
| 116 | 110 | ||
| 117 | if (handle_unknown) { | 111 | if (handle_unknown) { |
| 118 | DEBUGP("Unknown argument: calling %p\n", handle_unknown); | 112 | pr_debug("Unknown argument: calling %p\n", handle_unknown); |
| 119 | return handle_unknown(param, val); | 113 | return handle_unknown(param, val); |
| 120 | } | 114 | } |
| 121 | 115 | ||
| 122 | DEBUGP("Unknown argument `%s'\n", param); | 116 | pr_debug("Unknown argument `%s'\n", param); |
| 123 | return -ENOENT; | 117 | return -ENOENT; |
| 124 | } | 118 | } |
| 125 | 119 | ||
| @@ -184,7 +178,7 @@ int parse_args(const char *name, | |||
| 184 | { | 178 | { |
| 185 | char *param, *val; | 179 | char *param, *val; |
| 186 | 180 | ||
| 187 | DEBUGP("Parsing ARGS: %s\n", args); | 181 | pr_debug("Parsing ARGS: %s\n", args); |
| 188 | 182 | ||
| 189 | /* Chew leading spaces */ | 183 | /* Chew leading spaces */ |
| 190 | args = skip_spaces(args); | 184 | args = skip_spaces(args); |
| @@ -369,6 +363,30 @@ struct kernel_param_ops param_ops_invbool = { | |||
| 369 | }; | 363 | }; |
| 370 | EXPORT_SYMBOL(param_ops_invbool); | 364 | EXPORT_SYMBOL(param_ops_invbool); |
| 371 | 365 | ||
| 366 | int param_set_bint(const char *val, const struct kernel_param *kp) | ||
| 367 | { | ||
| 368 | struct kernel_param boolkp; | ||
| 369 | bool v; | ||
| 370 | int ret; | ||
| 371 | |||
| 372 | /* Match bool exactly, by re-using it. */ | ||
| 373 | boolkp = *kp; | ||
| 374 | boolkp.arg = &v; | ||
| 375 | boolkp.flags |= KPARAM_ISBOOL; | ||
| 376 | |||
| 377 | ret = param_set_bool(val, &boolkp); | ||
| 378 | if (ret == 0) | ||
| 379 | *(int *)kp->arg = v; | ||
| 380 | return ret; | ||
| 381 | } | ||
| 382 | EXPORT_SYMBOL(param_set_bint); | ||
| 383 | |||
| 384 | struct kernel_param_ops param_ops_bint = { | ||
| 385 | .set = param_set_bint, | ||
| 386 | .get = param_get_int, | ||
| 387 | }; | ||
| 388 | EXPORT_SYMBOL(param_ops_bint); | ||
| 389 | |||
| 372 | /* We break the rule and mangle the string. */ | 390 | /* We break the rule and mangle the string. */ |
| 373 | static int param_array(const char *name, | 391 | static int param_array(const char *name, |
| 374 | const char *val, | 392 | const char *val, |
diff --git a/kernel/pid.c b/kernel/pid.c index fa5f72227e5f..ce8e00deaccb 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
| @@ -137,7 +137,9 @@ static int pid_before(int base, int a, int b) | |||
| 137 | } | 137 | } |
| 138 | 138 | ||
| 139 | /* | 139 | /* |
| 140 | * We might be racing with someone else trying to set pid_ns->last_pid. | 140 | * We might be racing with someone else trying to set pid_ns->last_pid |
| 141 | * at the pid allocation time (there's also a sysctl for this, but racing | ||
| 142 | * with this one is OK, see comment in kernel/pid_namespace.c about it). | ||
| 141 | * We want the winner to have the "later" value, because if the | 143 | * We want the winner to have the "later" value, because if the |
| 142 | * "earlier" value prevails, then a pid may get reused immediately. | 144 | * "earlier" value prevails, then a pid may get reused immediately. |
| 143 | * | 145 | * |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index e9c9adc84ca6..a8968396046d 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
| @@ -191,9 +191,40 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
| 191 | return; | 191 | return; |
| 192 | } | 192 | } |
| 193 | 193 | ||
| 194 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, | ||
| 195 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 196 | { | ||
| 197 | struct ctl_table tmp = *table; | ||
| 198 | |||
| 199 | if (write && !capable(CAP_SYS_ADMIN)) | ||
| 200 | return -EPERM; | ||
| 201 | |||
| 202 | /* | ||
| 203 | * Writing directly to ns' last_pid field is OK, since this field | ||
| 204 | * is volatile in a living namespace anyway and a code writing to | ||
| 205 | * it should synchronize its usage with external means. | ||
| 206 | */ | ||
| 207 | |||
| 208 | tmp.data = ¤t->nsproxy->pid_ns->last_pid; | ||
| 209 | return proc_dointvec(&tmp, write, buffer, lenp, ppos); | ||
| 210 | } | ||
| 211 | |||
| 212 | static struct ctl_table pid_ns_ctl_table[] = { | ||
| 213 | { | ||
| 214 | .procname = "ns_last_pid", | ||
| 215 | .maxlen = sizeof(int), | ||
| 216 | .mode = 0666, /* permissions are checked in the handler */ | ||
| 217 | .proc_handler = pid_ns_ctl_handler, | ||
| 218 | }, | ||
| 219 | { } | ||
| 220 | }; | ||
| 221 | |||
| 222 | static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; | ||
| 223 | |||
| 194 | static __init int pid_namespaces_init(void) | 224 | static __init int pid_namespaces_init(void) |
| 195 | { | 225 | { |
| 196 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | 226 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); |
| 227 | register_sysctl_paths(kern_path, pid_ns_ctl_table); | ||
| 197 | return 0; | 228 | return 0; |
| 198 | } | 229 | } |
| 199 | 230 | ||
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index a6b0503574ee..6d6d28870335 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
| @@ -43,8 +43,6 @@ int in_suspend __nosavedata; | |||
| 43 | enum { | 43 | enum { |
| 44 | HIBERNATION_INVALID, | 44 | HIBERNATION_INVALID, |
| 45 | HIBERNATION_PLATFORM, | 45 | HIBERNATION_PLATFORM, |
| 46 | HIBERNATION_TEST, | ||
| 47 | HIBERNATION_TESTPROC, | ||
| 48 | HIBERNATION_SHUTDOWN, | 46 | HIBERNATION_SHUTDOWN, |
| 49 | HIBERNATION_REBOOT, | 47 | HIBERNATION_REBOOT, |
| 50 | /* keep last */ | 48 | /* keep last */ |
| @@ -55,7 +53,7 @@ enum { | |||
| 55 | 53 | ||
| 56 | static int hibernation_mode = HIBERNATION_SHUTDOWN; | 54 | static int hibernation_mode = HIBERNATION_SHUTDOWN; |
| 57 | 55 | ||
| 58 | static bool freezer_test_done; | 56 | bool freezer_test_done; |
| 59 | 57 | ||
| 60 | static const struct platform_hibernation_ops *hibernation_ops; | 58 | static const struct platform_hibernation_ops *hibernation_ops; |
| 61 | 59 | ||
| @@ -71,14 +69,14 @@ void hibernation_set_ops(const struct platform_hibernation_ops *ops) | |||
| 71 | WARN_ON(1); | 69 | WARN_ON(1); |
| 72 | return; | 70 | return; |
| 73 | } | 71 | } |
| 74 | mutex_lock(&pm_mutex); | 72 | lock_system_sleep(); |
| 75 | hibernation_ops = ops; | 73 | hibernation_ops = ops; |
| 76 | if (ops) | 74 | if (ops) |
| 77 | hibernation_mode = HIBERNATION_PLATFORM; | 75 | hibernation_mode = HIBERNATION_PLATFORM; |
| 78 | else if (hibernation_mode == HIBERNATION_PLATFORM) | 76 | else if (hibernation_mode == HIBERNATION_PLATFORM) |
| 79 | hibernation_mode = HIBERNATION_SHUTDOWN; | 77 | hibernation_mode = HIBERNATION_SHUTDOWN; |
| 80 | 78 | ||
| 81 | mutex_unlock(&pm_mutex); | 79 | unlock_system_sleep(); |
| 82 | } | 80 | } |
| 83 | 81 | ||
| 84 | static bool entering_platform_hibernation; | 82 | static bool entering_platform_hibernation; |
| @@ -96,15 +94,6 @@ static void hibernation_debug_sleep(void) | |||
| 96 | mdelay(5000); | 94 | mdelay(5000); |
| 97 | } | 95 | } |
| 98 | 96 | ||
| 99 | static int hibernation_testmode(int mode) | ||
| 100 | { | ||
| 101 | if (hibernation_mode == mode) { | ||
| 102 | hibernation_debug_sleep(); | ||
| 103 | return 1; | ||
| 104 | } | ||
| 105 | return 0; | ||
| 106 | } | ||
| 107 | |||
| 108 | static int hibernation_test(int level) | 97 | static int hibernation_test(int level) |
| 109 | { | 98 | { |
| 110 | if (pm_test_level == level) { | 99 | if (pm_test_level == level) { |
| @@ -114,7 +103,6 @@ static int hibernation_test(int level) | |||
| 114 | return 0; | 103 | return 0; |
| 115 | } | 104 | } |
| 116 | #else /* !CONFIG_PM_DEBUG */ | 105 | #else /* !CONFIG_PM_DEBUG */ |
| 117 | static int hibernation_testmode(int mode) { return 0; } | ||
| 118 | static int hibernation_test(int level) { return 0; } | 106 | static int hibernation_test(int level) { return 0; } |
| 119 | #endif /* !CONFIG_PM_DEBUG */ | 107 | #endif /* !CONFIG_PM_DEBUG */ |
| 120 | 108 | ||
| @@ -278,8 +266,7 @@ static int create_image(int platform_mode) | |||
| 278 | goto Platform_finish; | 266 | goto Platform_finish; |
| 279 | 267 | ||
| 280 | error = disable_nonboot_cpus(); | 268 | error = disable_nonboot_cpus(); |
| 281 | if (error || hibernation_test(TEST_CPUS) | 269 | if (error || hibernation_test(TEST_CPUS)) |
| 282 | || hibernation_testmode(HIBERNATION_TEST)) | ||
| 283 | goto Enable_cpus; | 270 | goto Enable_cpus; |
| 284 | 271 | ||
| 285 | local_irq_disable(); | 272 | local_irq_disable(); |
| @@ -333,7 +320,7 @@ static int create_image(int platform_mode) | |||
| 333 | */ | 320 | */ |
| 334 | int hibernation_snapshot(int platform_mode) | 321 | int hibernation_snapshot(int platform_mode) |
| 335 | { | 322 | { |
| 336 | pm_message_t msg = PMSG_RECOVER; | 323 | pm_message_t msg; |
| 337 | int error; | 324 | int error; |
| 338 | 325 | ||
| 339 | error = platform_begin(platform_mode); | 326 | error = platform_begin(platform_mode); |
| @@ -349,8 +336,7 @@ int hibernation_snapshot(int platform_mode) | |||
| 349 | if (error) | 336 | if (error) |
| 350 | goto Cleanup; | 337 | goto Cleanup; |
| 351 | 338 | ||
| 352 | if (hibernation_test(TEST_FREEZER) || | 339 | if (hibernation_test(TEST_FREEZER)) { |
| 353 | hibernation_testmode(HIBERNATION_TESTPROC)) { | ||
| 354 | 340 | ||
| 355 | /* | 341 | /* |
| 356 | * Indicate to the caller that we are returning due to a | 342 | * Indicate to the caller that we are returning due to a |
| @@ -362,26 +348,26 @@ int hibernation_snapshot(int platform_mode) | |||
| 362 | 348 | ||
| 363 | error = dpm_prepare(PMSG_FREEZE); | 349 | error = dpm_prepare(PMSG_FREEZE); |
| 364 | if (error) { | 350 | if (error) { |
| 365 | dpm_complete(msg); | 351 | dpm_complete(PMSG_RECOVER); |
| 366 | goto Cleanup; | 352 | goto Cleanup; |
| 367 | } | 353 | } |
| 368 | 354 | ||
| 369 | suspend_console(); | 355 | suspend_console(); |
| 370 | pm_restrict_gfp_mask(); | 356 | pm_restrict_gfp_mask(); |
| 357 | |||
| 371 | error = dpm_suspend(PMSG_FREEZE); | 358 | error = dpm_suspend(PMSG_FREEZE); |
| 372 | if (error) | ||
| 373 | goto Recover_platform; | ||
| 374 | 359 | ||
| 375 | if (hibernation_test(TEST_DEVICES)) | 360 | if (error || hibernation_test(TEST_DEVICES)) |
| 376 | goto Recover_platform; | 361 | platform_recover(platform_mode); |
| 362 | else | ||
| 363 | error = create_image(platform_mode); | ||
| 377 | 364 | ||
| 378 | error = create_image(platform_mode); | ||
| 379 | /* | 365 | /* |
| 380 | * Control returns here (1) after the image has been created or the | 366 | * In the case that we call create_image() above, the control |
| 367 | * returns here (1) after the image has been created or the | ||
| 381 | * image creation has failed and (2) after a successful restore. | 368 | * image creation has failed and (2) after a successful restore. |
| 382 | */ | 369 | */ |
| 383 | 370 | ||
| 384 | Resume_devices: | ||
| 385 | /* We may need to release the preallocated image pages here. */ | 371 | /* We may need to release the preallocated image pages here. */ |
| 386 | if (error || !in_suspend) | 372 | if (error || !in_suspend) |
| 387 | swsusp_free(); | 373 | swsusp_free(); |
| @@ -399,10 +385,6 @@ int hibernation_snapshot(int platform_mode) | |||
| 399 | platform_end(platform_mode); | 385 | platform_end(platform_mode); |
| 400 | return error; | 386 | return error; |
| 401 | 387 | ||
| 402 | Recover_platform: | ||
| 403 | platform_recover(platform_mode); | ||
| 404 | goto Resume_devices; | ||
| 405 | |||
| 406 | Cleanup: | 388 | Cleanup: |
| 407 | swsusp_free(); | 389 | swsusp_free(); |
| 408 | goto Close; | 390 | goto Close; |
| @@ -590,9 +572,6 @@ int hibernation_platform_enter(void) | |||
| 590 | static void power_down(void) | 572 | static void power_down(void) |
| 591 | { | 573 | { |
| 592 | switch (hibernation_mode) { | 574 | switch (hibernation_mode) { |
| 593 | case HIBERNATION_TEST: | ||
| 594 | case HIBERNATION_TESTPROC: | ||
| 595 | break; | ||
| 596 | case HIBERNATION_REBOOT: | 575 | case HIBERNATION_REBOOT: |
| 597 | kernel_restart(NULL); | 576 | kernel_restart(NULL); |
| 598 | break; | 577 | break; |
| @@ -611,17 +590,6 @@ static void power_down(void) | |||
| 611 | while(1); | 590 | while(1); |
| 612 | } | 591 | } |
| 613 | 592 | ||
| 614 | static int prepare_processes(void) | ||
| 615 | { | ||
| 616 | int error = 0; | ||
| 617 | |||
| 618 | if (freeze_processes()) { | ||
| 619 | error = -EBUSY; | ||
| 620 | thaw_processes(); | ||
| 621 | } | ||
| 622 | return error; | ||
| 623 | } | ||
| 624 | |||
| 625 | /** | 593 | /** |
| 626 | * hibernate - Carry out system hibernation, including saving the image. | 594 | * hibernate - Carry out system hibernation, including saving the image. |
| 627 | */ | 595 | */ |
| @@ -629,7 +597,7 @@ int hibernate(void) | |||
| 629 | { | 597 | { |
| 630 | int error; | 598 | int error; |
| 631 | 599 | ||
| 632 | mutex_lock(&pm_mutex); | 600 | lock_system_sleep(); |
| 633 | /* The snapshot device should not be opened while we're running */ | 601 | /* The snapshot device should not be opened while we're running */ |
| 634 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { | 602 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { |
| 635 | error = -EBUSY; | 603 | error = -EBUSY; |
| @@ -654,7 +622,7 @@ int hibernate(void) | |||
| 654 | sys_sync(); | 622 | sys_sync(); |
| 655 | printk("done.\n"); | 623 | printk("done.\n"); |
| 656 | 624 | ||
| 657 | error = prepare_processes(); | 625 | error = freeze_processes(); |
| 658 | if (error) | 626 | if (error) |
| 659 | goto Finish; | 627 | goto Finish; |
| 660 | 628 | ||
| @@ -697,7 +665,7 @@ int hibernate(void) | |||
| 697 | pm_restore_console(); | 665 | pm_restore_console(); |
| 698 | atomic_inc(&snapshot_device_available); | 666 | atomic_inc(&snapshot_device_available); |
| 699 | Unlock: | 667 | Unlock: |
| 700 | mutex_unlock(&pm_mutex); | 668 | unlock_system_sleep(); |
| 701 | return error; | 669 | return error; |
| 702 | } | 670 | } |
| 703 | 671 | ||
| @@ -811,11 +779,13 @@ static int software_resume(void) | |||
| 811 | goto close_finish; | 779 | goto close_finish; |
| 812 | 780 | ||
| 813 | error = create_basic_memory_bitmaps(); | 781 | error = create_basic_memory_bitmaps(); |
| 814 | if (error) | 782 | if (error) { |
| 783 | usermodehelper_enable(); | ||
| 815 | goto close_finish; | 784 | goto close_finish; |
| 785 | } | ||
| 816 | 786 | ||
| 817 | pr_debug("PM: Preparing processes for restore.\n"); | 787 | pr_debug("PM: Preparing processes for restore.\n"); |
| 818 | error = prepare_processes(); | 788 | error = freeze_processes(); |
| 819 | if (error) { | 789 | if (error) { |
| 820 | swsusp_close(FMODE_READ); | 790 | swsusp_close(FMODE_READ); |
| 821 | goto Done; | 791 | goto Done; |
| @@ -855,8 +825,6 @@ static const char * const hibernation_modes[] = { | |||
| 855 | [HIBERNATION_PLATFORM] = "platform", | 825 | [HIBERNATION_PLATFORM] = "platform", |
| 856 | [HIBERNATION_SHUTDOWN] = "shutdown", | 826 | [HIBERNATION_SHUTDOWN] = "shutdown", |
| 857 | [HIBERNATION_REBOOT] = "reboot", | 827 | [HIBERNATION_REBOOT] = "reboot", |
| 858 | [HIBERNATION_TEST] = "test", | ||
| 859 | [HIBERNATION_TESTPROC] = "testproc", | ||
| 860 | }; | 828 | }; |
| 861 | 829 | ||
| 862 | /* | 830 | /* |
| @@ -865,17 +833,15 @@ static const char * const hibernation_modes[] = { | |||
| 865 | * Hibernation can be handled in several ways. There are a few different ways | 833 | * Hibernation can be handled in several ways. There are a few different ways |
| 866 | * to put the system into the sleep state: using the platform driver (e.g. ACPI | 834 | * to put the system into the sleep state: using the platform driver (e.g. ACPI |
| 867 | * or other hibernation_ops), powering it off or rebooting it (for testing | 835 | * or other hibernation_ops), powering it off or rebooting it (for testing |
| 868 | * mostly), or using one of the two available test modes. | 836 | * mostly). |
| 869 | * | 837 | * |
| 870 | * The sysfs file /sys/power/disk provides an interface for selecting the | 838 | * The sysfs file /sys/power/disk provides an interface for selecting the |
| 871 | * hibernation mode to use. Reading from this file causes the available modes | 839 | * hibernation mode to use. Reading from this file causes the available modes |
| 872 | * to be printed. There are 5 modes that can be supported: | 840 | * to be printed. There are 3 modes that can be supported: |
| 873 | * | 841 | * |
| 874 | * 'platform' | 842 | * 'platform' |
| 875 | * 'shutdown' | 843 | * 'shutdown' |
| 876 | * 'reboot' | 844 | * 'reboot' |
| 877 | * 'test' | ||
| 878 | * 'testproc' | ||
| 879 | * | 845 | * |
| 880 | * If a platform hibernation driver is in use, 'platform' will be supported | 846 | * If a platform hibernation driver is in use, 'platform' will be supported |
| 881 | * and will be used by default. Otherwise, 'shutdown' will be used by default. | 847 | * and will be used by default. Otherwise, 'shutdown' will be used by default. |
| @@ -899,8 +865,6 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, | |||
| 899 | switch (i) { | 865 | switch (i) { |
| 900 | case HIBERNATION_SHUTDOWN: | 866 | case HIBERNATION_SHUTDOWN: |
| 901 | case HIBERNATION_REBOOT: | 867 | case HIBERNATION_REBOOT: |
| 902 | case HIBERNATION_TEST: | ||
| 903 | case HIBERNATION_TESTPROC: | ||
| 904 | break; | 868 | break; |
| 905 | case HIBERNATION_PLATFORM: | 869 | case HIBERNATION_PLATFORM: |
| 906 | if (hibernation_ops) | 870 | if (hibernation_ops) |
| @@ -929,7 +893,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
| 929 | p = memchr(buf, '\n', n); | 893 | p = memchr(buf, '\n', n); |
| 930 | len = p ? p - buf : n; | 894 | len = p ? p - buf : n; |
| 931 | 895 | ||
| 932 | mutex_lock(&pm_mutex); | 896 | lock_system_sleep(); |
| 933 | for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { | 897 | for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { |
| 934 | if (len == strlen(hibernation_modes[i]) | 898 | if (len == strlen(hibernation_modes[i]) |
| 935 | && !strncmp(buf, hibernation_modes[i], len)) { | 899 | && !strncmp(buf, hibernation_modes[i], len)) { |
| @@ -941,8 +905,6 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
| 941 | switch (mode) { | 905 | switch (mode) { |
| 942 | case HIBERNATION_SHUTDOWN: | 906 | case HIBERNATION_SHUTDOWN: |
| 943 | case HIBERNATION_REBOOT: | 907 | case HIBERNATION_REBOOT: |
| 944 | case HIBERNATION_TEST: | ||
| 945 | case HIBERNATION_TESTPROC: | ||
| 946 | hibernation_mode = mode; | 908 | hibernation_mode = mode; |
| 947 | break; | 909 | break; |
| 948 | case HIBERNATION_PLATFORM: | 910 | case HIBERNATION_PLATFORM: |
| @@ -957,7 +919,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
| 957 | if (!error) | 919 | if (!error) |
| 958 | pr_debug("PM: Hibernation mode set to '%s'\n", | 920 | pr_debug("PM: Hibernation mode set to '%s'\n", |
| 959 | hibernation_modes[mode]); | 921 | hibernation_modes[mode]); |
| 960 | mutex_unlock(&pm_mutex); | 922 | unlock_system_sleep(); |
| 961 | return error ? error : n; | 923 | return error ? error : n; |
| 962 | } | 924 | } |
| 963 | 925 | ||
| @@ -984,9 +946,9 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
| 984 | if (maj != MAJOR(res) || min != MINOR(res)) | 946 | if (maj != MAJOR(res) || min != MINOR(res)) |
| 985 | goto out; | 947 | goto out; |
| 986 | 948 | ||
| 987 | mutex_lock(&pm_mutex); | 949 | lock_system_sleep(); |
| 988 | swsusp_resume_device = res; | 950 | swsusp_resume_device = res; |
| 989 | mutex_unlock(&pm_mutex); | 951 | unlock_system_sleep(); |
| 990 | printk(KERN_INFO "PM: Starting manual resume from disk\n"); | 952 | printk(KERN_INFO "PM: Starting manual resume from disk\n"); |
| 991 | noresume = 0; | 953 | noresume = 0; |
| 992 | software_resume(); | 954 | software_resume(); |
diff --git a/kernel/power/main.c b/kernel/power/main.c index 36e0f0903c32..9824b41e5a18 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | * | 3 | * |
| 4 | * Copyright (c) 2003 Patrick Mochel | 4 | * Copyright (c) 2003 Patrick Mochel |
| 5 | * Copyright (c) 2003 Open Source Development Lab | 5 | * Copyright (c) 2003 Open Source Development Lab |
| 6 | * | 6 | * |
| 7 | * This file is released under the GPLv2 | 7 | * This file is released under the GPLv2 |
| 8 | * | 8 | * |
| 9 | */ | 9 | */ |
| @@ -116,7 +116,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
| 116 | p = memchr(buf, '\n', n); | 116 | p = memchr(buf, '\n', n); |
| 117 | len = p ? p - buf : n; | 117 | len = p ? p - buf : n; |
| 118 | 118 | ||
| 119 | mutex_lock(&pm_mutex); | 119 | lock_system_sleep(); |
| 120 | 120 | ||
| 121 | level = TEST_FIRST; | 121 | level = TEST_FIRST; |
| 122 | for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++) | 122 | for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++) |
| @@ -126,7 +126,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
| 126 | break; | 126 | break; |
| 127 | } | 127 | } |
| 128 | 128 | ||
| 129 | mutex_unlock(&pm_mutex); | 129 | unlock_system_sleep(); |
| 130 | 130 | ||
| 131 | return error ? error : n; | 131 | return error ? error : n; |
| 132 | } | 132 | } |
| @@ -240,7 +240,7 @@ struct kobject *power_kobj; | |||
| 240 | * 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and | 240 | * 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and |
| 241 | * 'disk' (Suspend-to-Disk). | 241 | * 'disk' (Suspend-to-Disk). |
| 242 | * | 242 | * |
| 243 | * store() accepts one of those strings, translates it into the | 243 | * store() accepts one of those strings, translates it into the |
| 244 | * proper enumerated value, and initiates a suspend transition. | 244 | * proper enumerated value, and initiates a suspend transition. |
| 245 | */ | 245 | */ |
| 246 | static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, | 246 | static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, |
| @@ -282,7 +282,7 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
| 282 | /* First, check if we are requested to hibernate */ | 282 | /* First, check if we are requested to hibernate */ |
| 283 | if (len == 4 && !strncmp(buf, "disk", len)) { | 283 | if (len == 4 && !strncmp(buf, "disk", len)) { |
| 284 | error = hibernate(); | 284 | error = hibernate(); |
| 285 | goto Exit; | 285 | goto Exit; |
| 286 | } | 286 | } |
| 287 | 287 | ||
| 288 | #ifdef CONFIG_SUSPEND | 288 | #ifdef CONFIG_SUSPEND |
diff --git a/kernel/power/power.h b/kernel/power/power.h index 23a2db1ec442..0c4defe6d3b8 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
| @@ -50,6 +50,8 @@ static inline char *check_image_kernel(struct swsusp_info *info) | |||
| 50 | #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) | 50 | #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) |
| 51 | 51 | ||
| 52 | /* kernel/power/hibernate.c */ | 52 | /* kernel/power/hibernate.c */ |
| 53 | extern bool freezer_test_done; | ||
| 54 | |||
| 53 | extern int hibernation_snapshot(int platform_mode); | 55 | extern int hibernation_snapshot(int platform_mode); |
| 54 | extern int hibernation_restore(int platform_mode); | 56 | extern int hibernation_restore(int platform_mode); |
| 55 | extern int hibernation_platform_enter(void); | 57 | extern int hibernation_platform_enter(void); |
diff --git a/kernel/power/process.c b/kernel/power/process.c index addbbe5531bc..77274c9ba2f1 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
| @@ -22,16 +22,7 @@ | |||
| 22 | */ | 22 | */ |
| 23 | #define TIMEOUT (20 * HZ) | 23 | #define TIMEOUT (20 * HZ) |
| 24 | 24 | ||
| 25 | static inline int freezable(struct task_struct * p) | 25 | static int try_to_freeze_tasks(bool user_only) |
| 26 | { | ||
| 27 | if ((p == current) || | ||
| 28 | (p->flags & PF_NOFREEZE) || | ||
| 29 | (p->exit_state != 0)) | ||
| 30 | return 0; | ||
| 31 | return 1; | ||
| 32 | } | ||
| 33 | |||
| 34 | static int try_to_freeze_tasks(bool sig_only) | ||
| 35 | { | 26 | { |
| 36 | struct task_struct *g, *p; | 27 | struct task_struct *g, *p; |
| 37 | unsigned long end_time; | 28 | unsigned long end_time; |
| @@ -46,17 +37,14 @@ static int try_to_freeze_tasks(bool sig_only) | |||
| 46 | 37 | ||
| 47 | end_time = jiffies + TIMEOUT; | 38 | end_time = jiffies + TIMEOUT; |
| 48 | 39 | ||
| 49 | if (!sig_only) | 40 | if (!user_only) |
| 50 | freeze_workqueues_begin(); | 41 | freeze_workqueues_begin(); |
| 51 | 42 | ||
| 52 | while (true) { | 43 | while (true) { |
| 53 | todo = 0; | 44 | todo = 0; |
| 54 | read_lock(&tasklist_lock); | 45 | read_lock(&tasklist_lock); |
| 55 | do_each_thread(g, p) { | 46 | do_each_thread(g, p) { |
| 56 | if (frozen(p) || !freezable(p)) | 47 | if (p == current || !freeze_task(p)) |
| 57 | continue; | ||
| 58 | |||
| 59 | if (!freeze_task(p, sig_only)) | ||
| 60 | continue; | 48 | continue; |
| 61 | 49 | ||
| 62 | /* | 50 | /* |
| @@ -77,7 +65,7 @@ static int try_to_freeze_tasks(bool sig_only) | |||
| 77 | } while_each_thread(g, p); | 65 | } while_each_thread(g, p); |
| 78 | read_unlock(&tasklist_lock); | 66 | read_unlock(&tasklist_lock); |
| 79 | 67 | ||
| 80 | if (!sig_only) { | 68 | if (!user_only) { |
| 81 | wq_busy = freeze_workqueues_busy(); | 69 | wq_busy = freeze_workqueues_busy(); |
| 82 | todo += wq_busy; | 70 | todo += wq_busy; |
| 83 | } | 71 | } |
| @@ -103,11 +91,6 @@ static int try_to_freeze_tasks(bool sig_only) | |||
| 103 | elapsed_csecs = elapsed_csecs64; | 91 | elapsed_csecs = elapsed_csecs64; |
| 104 | 92 | ||
| 105 | if (todo) { | 93 | if (todo) { |
| 106 | /* This does not unfreeze processes that are already frozen | ||
| 107 | * (we have slightly ugly calling convention in that respect, | ||
| 108 | * and caller must call thaw_processes() if something fails), | ||
| 109 | * but it cleans up leftover PF_FREEZE requests. | ||
| 110 | */ | ||
| 111 | printk("\n"); | 94 | printk("\n"); |
| 112 | printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds " | 95 | printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds " |
| 113 | "(%d tasks refusing to freeze, wq_busy=%d):\n", | 96 | "(%d tasks refusing to freeze, wq_busy=%d):\n", |
| @@ -115,15 +98,11 @@ static int try_to_freeze_tasks(bool sig_only) | |||
| 115 | elapsed_csecs / 100, elapsed_csecs % 100, | 98 | elapsed_csecs / 100, elapsed_csecs % 100, |
| 116 | todo - wq_busy, wq_busy); | 99 | todo - wq_busy, wq_busy); |
| 117 | 100 | ||
| 118 | thaw_workqueues(); | ||
| 119 | |||
| 120 | read_lock(&tasklist_lock); | 101 | read_lock(&tasklist_lock); |
| 121 | do_each_thread(g, p) { | 102 | do_each_thread(g, p) { |
| 122 | task_lock(p); | 103 | if (!wakeup && !freezer_should_skip(p) && |
| 123 | if (!wakeup && freezing(p) && !freezer_should_skip(p)) | 104 | p != current && freezing(p) && !frozen(p)) |
| 124 | sched_show_task(p); | 105 | sched_show_task(p); |
| 125 | cancel_freezing(p); | ||
| 126 | task_unlock(p); | ||
| 127 | } while_each_thread(g, p); | 106 | } while_each_thread(g, p); |
| 128 | read_unlock(&tasklist_lock); | 107 | read_unlock(&tasklist_lock); |
| 129 | } else { | 108 | } else { |
| @@ -136,12 +115,18 @@ static int try_to_freeze_tasks(bool sig_only) | |||
| 136 | 115 | ||
| 137 | /** | 116 | /** |
| 138 | * freeze_processes - Signal user space processes to enter the refrigerator. | 117 | * freeze_processes - Signal user space processes to enter the refrigerator. |
| 118 | * | ||
| 119 | * On success, returns 0. On failure, -errno and system is fully thawed. | ||
| 139 | */ | 120 | */ |
| 140 | int freeze_processes(void) | 121 | int freeze_processes(void) |
| 141 | { | 122 | { |
| 142 | int error; | 123 | int error; |
| 143 | 124 | ||
| 125 | if (!pm_freezing) | ||
| 126 | atomic_inc(&system_freezing_cnt); | ||
| 127 | |||
| 144 | printk("Freezing user space processes ... "); | 128 | printk("Freezing user space processes ... "); |
| 129 | pm_freezing = true; | ||
| 145 | error = try_to_freeze_tasks(true); | 130 | error = try_to_freeze_tasks(true); |
| 146 | if (!error) { | 131 | if (!error) { |
| 147 | printk("done."); | 132 | printk("done."); |
| @@ -150,17 +135,22 @@ int freeze_processes(void) | |||
| 150 | printk("\n"); | 135 | printk("\n"); |
| 151 | BUG_ON(in_atomic()); | 136 | BUG_ON(in_atomic()); |
| 152 | 137 | ||
| 138 | if (error) | ||
| 139 | thaw_processes(); | ||
| 153 | return error; | 140 | return error; |
| 154 | } | 141 | } |
| 155 | 142 | ||
| 156 | /** | 143 | /** |
| 157 | * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator. | 144 | * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator. |
| 145 | * | ||
| 146 | * On success, returns 0. On failure, -errno and system is fully thawed. | ||
| 158 | */ | 147 | */ |
| 159 | int freeze_kernel_threads(void) | 148 | int freeze_kernel_threads(void) |
| 160 | { | 149 | { |
| 161 | int error; | 150 | int error; |
| 162 | 151 | ||
| 163 | printk("Freezing remaining freezable tasks ... "); | 152 | printk("Freezing remaining freezable tasks ... "); |
| 153 | pm_nosig_freezing = true; | ||
| 164 | error = try_to_freeze_tasks(false); | 154 | error = try_to_freeze_tasks(false); |
| 165 | if (!error) | 155 | if (!error) |
| 166 | printk("done."); | 156 | printk("done."); |
| @@ -168,37 +158,32 @@ int freeze_kernel_threads(void) | |||
| 168 | printk("\n"); | 158 | printk("\n"); |
| 169 | BUG_ON(in_atomic()); | 159 | BUG_ON(in_atomic()); |
| 170 | 160 | ||
| 161 | if (error) | ||
| 162 | thaw_processes(); | ||
| 171 | return error; | 163 | return error; |
| 172 | } | 164 | } |
| 173 | 165 | ||
| 174 | static void thaw_tasks(bool nosig_only) | 166 | void thaw_processes(void) |
| 175 | { | 167 | { |
| 176 | struct task_struct *g, *p; | 168 | struct task_struct *g, *p; |
| 177 | 169 | ||
| 178 | read_lock(&tasklist_lock); | 170 | if (pm_freezing) |
| 179 | do_each_thread(g, p) { | 171 | atomic_dec(&system_freezing_cnt); |
| 180 | if (!freezable(p)) | 172 | pm_freezing = false; |
| 181 | continue; | 173 | pm_nosig_freezing = false; |
| 182 | 174 | ||
| 183 | if (nosig_only && should_send_signal(p)) | 175 | oom_killer_enable(); |
| 184 | continue; | 176 | |
| 177 | printk("Restarting tasks ... "); | ||
| 185 | 178 | ||
| 186 | if (cgroup_freezing_or_frozen(p)) | 179 | thaw_workqueues(); |
| 187 | continue; | ||
| 188 | 180 | ||
| 189 | thaw_process(p); | 181 | read_lock(&tasklist_lock); |
| 182 | do_each_thread(g, p) { | ||
| 183 | __thaw_task(p); | ||
| 190 | } while_each_thread(g, p); | 184 | } while_each_thread(g, p); |
| 191 | read_unlock(&tasklist_lock); | 185 | read_unlock(&tasklist_lock); |
| 192 | } | ||
| 193 | 186 | ||
| 194 | void thaw_processes(void) | ||
| 195 | { | ||
| 196 | oom_killer_enable(); | ||
| 197 | |||
| 198 | printk("Restarting tasks ... "); | ||
| 199 | thaw_workqueues(); | ||
| 200 | thaw_tasks(true); | ||
| 201 | thaw_tasks(false); | ||
| 202 | schedule(); | 187 | schedule(); |
| 203 | printk("done.\n"); | 188 | printk("done.\n"); |
| 204 | } | 189 | } |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index cbe2c1441392..1cf88900ec4f 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
| @@ -858,6 +858,9 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) | |||
| 858 | PageReserved(page)) | 858 | PageReserved(page)) |
| 859 | return NULL; | 859 | return NULL; |
| 860 | 860 | ||
| 861 | if (page_is_guard(page)) | ||
| 862 | return NULL; | ||
| 863 | |||
| 861 | return page; | 864 | return page; |
| 862 | } | 865 | } |
| 863 | 866 | ||
| @@ -920,6 +923,9 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn) | |||
| 920 | && (!kernel_page_present(page) || pfn_is_nosave(pfn))) | 923 | && (!kernel_page_present(page) || pfn_is_nosave(pfn))) |
| 921 | return NULL; | 924 | return NULL; |
| 922 | 925 | ||
| 926 | if (page_is_guard(page)) | ||
| 927 | return NULL; | ||
| 928 | |||
| 923 | return page; | 929 | return page; |
| 924 | } | 930 | } |
| 925 | 931 | ||
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4953dc054c53..4fd51beed879 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
| @@ -42,9 +42,9 @@ static const struct platform_suspend_ops *suspend_ops; | |||
| 42 | */ | 42 | */ |
| 43 | void suspend_set_ops(const struct platform_suspend_ops *ops) | 43 | void suspend_set_ops(const struct platform_suspend_ops *ops) |
| 44 | { | 44 | { |
| 45 | mutex_lock(&pm_mutex); | 45 | lock_system_sleep(); |
| 46 | suspend_ops = ops; | 46 | suspend_ops = ops; |
| 47 | mutex_unlock(&pm_mutex); | 47 | unlock_system_sleep(); |
| 48 | } | 48 | } |
| 49 | EXPORT_SYMBOL_GPL(suspend_set_ops); | 49 | EXPORT_SYMBOL_GPL(suspend_set_ops); |
| 50 | 50 | ||
| @@ -106,13 +106,11 @@ static int suspend_prepare(void) | |||
| 106 | goto Finish; | 106 | goto Finish; |
| 107 | 107 | ||
| 108 | error = suspend_freeze_processes(); | 108 | error = suspend_freeze_processes(); |
| 109 | if (error) { | 109 | if (!error) |
| 110 | suspend_stats.failed_freeze++; | ||
| 111 | dpm_save_failed_step(SUSPEND_FREEZE); | ||
| 112 | } else | ||
| 113 | return 0; | 110 | return 0; |
| 114 | 111 | ||
| 115 | suspend_thaw_processes(); | 112 | suspend_stats.failed_freeze++; |
| 113 | dpm_save_failed_step(SUSPEND_FREEZE); | ||
| 116 | usermodehelper_enable(); | 114 | usermodehelper_enable(); |
| 117 | Finish: | 115 | Finish: |
| 118 | pm_notifier_call_chain(PM_POST_SUSPEND); | 116 | pm_notifier_call_chain(PM_POST_SUSPEND); |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 11a594c4ba25..8742fd013a94 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include <linux/bitops.h> | 18 | #include <linux/bitops.h> |
| 19 | #include <linux/genhd.h> | 19 | #include <linux/genhd.h> |
| 20 | #include <linux/device.h> | 20 | #include <linux/device.h> |
| 21 | #include <linux/buffer_head.h> | ||
| 22 | #include <linux/bio.h> | 21 | #include <linux/bio.h> |
| 23 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
| 24 | #include <linux/swap.h> | 23 | #include <linux/swap.h> |
| @@ -774,8 +773,7 @@ static int enough_swap(unsigned int nr_pages, unsigned int flags) | |||
| 774 | 773 | ||
| 775 | pr_debug("PM: Free swap pages: %u\n", free_swap); | 774 | pr_debug("PM: Free swap pages: %u\n", free_swap); |
| 776 | 775 | ||
| 777 | required = PAGES_FOR_IO + ((flags & SF_NOCOMPRESS_MODE) ? | 776 | required = PAGES_FOR_IO + nr_pages; |
| 778 | nr_pages : (nr_pages * LZO_CMP_PAGES) / LZO_UNC_PAGES + 1); | ||
| 779 | return free_swap > required; | 777 | return free_swap > required; |
| 780 | } | 778 | } |
| 781 | 779 | ||
| @@ -803,10 +801,12 @@ int swsusp_write(unsigned int flags) | |||
| 803 | printk(KERN_ERR "PM: Cannot get swap writer\n"); | 801 | printk(KERN_ERR "PM: Cannot get swap writer\n"); |
| 804 | return error; | 802 | return error; |
| 805 | } | 803 | } |
| 806 | if (!enough_swap(pages, flags)) { | 804 | if (flags & SF_NOCOMPRESS_MODE) { |
| 807 | printk(KERN_ERR "PM: Not enough free swap\n"); | 805 | if (!enough_swap(pages, flags)) { |
| 808 | error = -ENOSPC; | 806 | printk(KERN_ERR "PM: Not enough free swap\n"); |
| 809 | goto out_finish; | 807 | error = -ENOSPC; |
| 808 | goto out_finish; | ||
| 809 | } | ||
| 810 | } | 810 | } |
| 811 | memset(&snapshot, 0, sizeof(struct snapshot_handle)); | 811 | memset(&snapshot, 0, sizeof(struct snapshot_handle)); |
| 812 | error = snapshot_read_next(&snapshot); | 812 | error = snapshot_read_next(&snapshot); |
diff --git a/kernel/power/user.c b/kernel/power/user.c index 6d8f535c2b88..6b1ab7a88522 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/swapops.h> | 21 | #include <linux/swapops.h> |
| 22 | #include <linux/pm.h> | 22 | #include <linux/pm.h> |
| 23 | #include <linux/fs.h> | 23 | #include <linux/fs.h> |
| 24 | #include <linux/compat.h> | ||
| 24 | #include <linux/console.h> | 25 | #include <linux/console.h> |
| 25 | #include <linux/cpu.h> | 26 | #include <linux/cpu.h> |
| 26 | #include <linux/freezer.h> | 27 | #include <linux/freezer.h> |
| @@ -30,28 +31,6 @@ | |||
| 30 | 31 | ||
| 31 | #include "power.h" | 32 | #include "power.h" |
| 32 | 33 | ||
| 33 | /* | ||
| 34 | * NOTE: The SNAPSHOT_SET_SWAP_FILE and SNAPSHOT_PMOPS ioctls are obsolete and | ||
| 35 | * will be removed in the future. They are only preserved here for | ||
| 36 | * compatibility with existing userland utilities. | ||
| 37 | */ | ||
| 38 | #define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int) | ||
| 39 | #define SNAPSHOT_PMOPS _IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int) | ||
| 40 | |||
| 41 | #define PMOPS_PREPARE 1 | ||
| 42 | #define PMOPS_ENTER 2 | ||
| 43 | #define PMOPS_FINISH 3 | ||
| 44 | |||
| 45 | /* | ||
| 46 | * NOTE: The following ioctl definitions are wrong and have been replaced with | ||
| 47 | * correct ones. They are only preserved here for compatibility with existing | ||
| 48 | * userland utilities and will be removed in the future. | ||
| 49 | */ | ||
| 50 | #define SNAPSHOT_ATOMIC_SNAPSHOT _IOW(SNAPSHOT_IOC_MAGIC, 3, void *) | ||
| 51 | #define SNAPSHOT_SET_IMAGE_SIZE _IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long) | ||
| 52 | #define SNAPSHOT_AVAIL_SWAP _IOR(SNAPSHOT_IOC_MAGIC, 7, void *) | ||
| 53 | #define SNAPSHOT_GET_SWAP_PAGE _IOR(SNAPSHOT_IOC_MAGIC, 8, void *) | ||
| 54 | |||
| 55 | 34 | ||
| 56 | #define SNAPSHOT_MINOR 231 | 35 | #define SNAPSHOT_MINOR 231 |
| 57 | 36 | ||
| @@ -71,7 +50,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) | |||
| 71 | struct snapshot_data *data; | 50 | struct snapshot_data *data; |
| 72 | int error; | 51 | int error; |
| 73 | 52 | ||
| 74 | mutex_lock(&pm_mutex); | 53 | lock_system_sleep(); |
| 75 | 54 | ||
| 76 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { | 55 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { |
| 77 | error = -EBUSY; | 56 | error = -EBUSY; |
| @@ -123,7 +102,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) | |||
| 123 | data->platform_support = 0; | 102 | data->platform_support = 0; |
| 124 | 103 | ||
| 125 | Unlock: | 104 | Unlock: |
| 126 | mutex_unlock(&pm_mutex); | 105 | unlock_system_sleep(); |
| 127 | 106 | ||
| 128 | return error; | 107 | return error; |
| 129 | } | 108 | } |
| @@ -132,7 +111,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) | |||
| 132 | { | 111 | { |
| 133 | struct snapshot_data *data; | 112 | struct snapshot_data *data; |
| 134 | 113 | ||
| 135 | mutex_lock(&pm_mutex); | 114 | lock_system_sleep(); |
| 136 | 115 | ||
| 137 | swsusp_free(); | 116 | swsusp_free(); |
| 138 | free_basic_memory_bitmaps(); | 117 | free_basic_memory_bitmaps(); |
| @@ -146,7 +125,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) | |||
| 146 | PM_POST_HIBERNATION : PM_POST_RESTORE); | 125 | PM_POST_HIBERNATION : PM_POST_RESTORE); |
| 147 | atomic_inc(&snapshot_device_available); | 126 | atomic_inc(&snapshot_device_available); |
| 148 | 127 | ||
| 149 | mutex_unlock(&pm_mutex); | 128 | unlock_system_sleep(); |
| 150 | 129 | ||
| 151 | return 0; | 130 | return 0; |
| 152 | } | 131 | } |
| @@ -158,7 +137,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, | |||
| 158 | ssize_t res; | 137 | ssize_t res; |
| 159 | loff_t pg_offp = *offp & ~PAGE_MASK; | 138 | loff_t pg_offp = *offp & ~PAGE_MASK; |
| 160 | 139 | ||
| 161 | mutex_lock(&pm_mutex); | 140 | lock_system_sleep(); |
| 162 | 141 | ||
| 163 | data = filp->private_data; | 142 | data = filp->private_data; |
| 164 | if (!data->ready) { | 143 | if (!data->ready) { |
| @@ -179,7 +158,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, | |||
| 179 | *offp += res; | 158 | *offp += res; |
| 180 | 159 | ||
| 181 | Unlock: | 160 | Unlock: |
| 182 | mutex_unlock(&pm_mutex); | 161 | unlock_system_sleep(); |
| 183 | 162 | ||
| 184 | return res; | 163 | return res; |
| 185 | } | 164 | } |
| @@ -191,7 +170,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, | |||
| 191 | ssize_t res; | 170 | ssize_t res; |
| 192 | loff_t pg_offp = *offp & ~PAGE_MASK; | 171 | loff_t pg_offp = *offp & ~PAGE_MASK; |
| 193 | 172 | ||
| 194 | mutex_lock(&pm_mutex); | 173 | lock_system_sleep(); |
| 195 | 174 | ||
| 196 | data = filp->private_data; | 175 | data = filp->private_data; |
| 197 | 176 | ||
| @@ -208,20 +187,11 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, | |||
| 208 | if (res > 0) | 187 | if (res > 0) |
| 209 | *offp += res; | 188 | *offp += res; |
| 210 | unlock: | 189 | unlock: |
| 211 | mutex_unlock(&pm_mutex); | 190 | unlock_system_sleep(); |
| 212 | 191 | ||
| 213 | return res; | 192 | return res; |
| 214 | } | 193 | } |
| 215 | 194 | ||
| 216 | static void snapshot_deprecated_ioctl(unsigned int cmd) | ||
| 217 | { | ||
| 218 | if (printk_ratelimit()) | ||
| 219 | printk(KERN_NOTICE "%pf: ioctl '%.8x' is deprecated and will " | ||
| 220 | "be removed soon, update your suspend-to-disk " | ||
| 221 | "utilities\n", | ||
| 222 | __builtin_return_address(0), cmd); | ||
| 223 | } | ||
| 224 | |||
| 225 | static long snapshot_ioctl(struct file *filp, unsigned int cmd, | 195 | static long snapshot_ioctl(struct file *filp, unsigned int cmd, |
| 226 | unsigned long arg) | 196 | unsigned long arg) |
| 227 | { | 197 | { |
| @@ -257,11 +227,9 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
| 257 | break; | 227 | break; |
| 258 | 228 | ||
| 259 | error = freeze_processes(); | 229 | error = freeze_processes(); |
| 260 | if (error) { | 230 | if (error) |
| 261 | thaw_processes(); | ||
| 262 | usermodehelper_enable(); | 231 | usermodehelper_enable(); |
| 263 | } | 232 | else |
| 264 | if (!error) | ||
| 265 | data->frozen = 1; | 233 | data->frozen = 1; |
| 266 | break; | 234 | break; |
| 267 | 235 | ||
| @@ -274,8 +242,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
| 274 | data->frozen = 0; | 242 | data->frozen = 0; |
| 275 | break; | 243 | break; |
| 276 | 244 | ||
| 277 | case SNAPSHOT_ATOMIC_SNAPSHOT: | ||
| 278 | snapshot_deprecated_ioctl(cmd); | ||
| 279 | case SNAPSHOT_CREATE_IMAGE: | 245 | case SNAPSHOT_CREATE_IMAGE: |
| 280 | if (data->mode != O_RDONLY || !data->frozen || data->ready) { | 246 | if (data->mode != O_RDONLY || !data->frozen || data->ready) { |
| 281 | error = -EPERM; | 247 | error = -EPERM; |
| @@ -283,10 +249,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
| 283 | } | 249 | } |
| 284 | pm_restore_gfp_mask(); | 250 | pm_restore_gfp_mask(); |
| 285 | error = hibernation_snapshot(data->platform_support); | 251 | error = hibernation_snapshot(data->platform_support); |
| 286 | if (!error) | 252 | if (!error) { |
| 287 | error = put_user(in_suspend, (int __user *)arg); | 253 | error = put_user(in_suspend, (int __user *)arg); |
| 288 | if (!error) | 254 | if (!error && !freezer_test_done) |
| 289 | data->ready = 1; | 255 | data->ready = 1; |
| 256 | if (freezer_test_done) { | ||
| 257 | freezer_test_done = false; | ||
| 258 | thaw_processes(); | ||
| 259 | } | ||
| 260 | } | ||
| 290 | break; | 261 | break; |
| 291 | 262 | ||
| 292 | case SNAPSHOT_ATOMIC_RESTORE: | 263 | case SNAPSHOT_ATOMIC_RESTORE: |
| @@ -305,8 +276,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
| 305 | data->ready = 0; | 276 | data->ready = 0; |
| 306 | break; | 277 | break; |
| 307 | 278 | ||
| 308 | case SNAPSHOT_SET_IMAGE_SIZE: | ||
| 309 | snapshot_deprecated_ioctl(cmd); | ||
| 310 | case SNAPSHOT_PREF_IMAGE_SIZE: | 279 | case SNAPSHOT_PREF_IMAGE_SIZE: |
| 311 | image_size = arg; | 280 | image_size = arg; |
| 312 | break; | 281 | break; |
| @@ -321,16 +290,12 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
| 321 | error = put_user(size, (loff_t __user *)arg); | 290 | error = put_user(size, (loff_t __user *)arg); |
| 322 | break; | 291 | break; |
| 323 | 292 | ||
| 324 | case SNAPSHOT_AVAIL_SWAP: | ||
| 325 | snapshot_deprecated_ioctl(cmd); | ||
| 326 | case SNAPSHOT_AVAIL_SWAP_SIZE: | 293 | case SNAPSHOT_AVAIL_SWAP_SIZE: |
| 327 | size = count_swap_pages(data->swap, 1); | 294 | size = count_swap_pages(data->swap, 1); |
| 328 | size <<= PAGE_SHIFT; | 295 | size <<= PAGE_SHIFT; |
| 329 | error = put_user(size, (loff_t __user *)arg); | 296 | error = put_user(size, (loff_t __user *)arg); |
| 330 | break; | 297 | break; |
| 331 | 298 | ||
| 332 | case SNAPSHOT_GET_SWAP_PAGE: | ||
| 333 | snapshot_deprecated_ioctl(cmd); | ||
| 334 | case SNAPSHOT_ALLOC_SWAP_PAGE: | 299 | case SNAPSHOT_ALLOC_SWAP_PAGE: |
| 335 | if (data->swap < 0 || data->swap >= MAX_SWAPFILES) { | 300 | if (data->swap < 0 || data->swap >= MAX_SWAPFILES) { |
| 336 | error = -ENODEV; | 301 | error = -ENODEV; |
| @@ -353,27 +318,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
| 353 | free_all_swap_pages(data->swap); | 318 | free_all_swap_pages(data->swap); |
| 354 | break; | 319 | break; |
| 355 | 320 | ||
| 356 | case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */ | ||
| 357 | snapshot_deprecated_ioctl(cmd); | ||
| 358 | if (!swsusp_swap_in_use()) { | ||
| 359 | /* | ||
| 360 | * User space encodes device types as two-byte values, | ||
| 361 | * so we need to recode them | ||
| 362 | */ | ||
| 363 | if (old_decode_dev(arg)) { | ||
| 364 | data->swap = swap_type_of(old_decode_dev(arg), | ||
| 365 | 0, NULL); | ||
| 366 | if (data->swap < 0) | ||
| 367 | error = -ENODEV; | ||
| 368 | } else { | ||
| 369 | data->swap = -1; | ||
| 370 | error = -EINVAL; | ||
| 371 | } | ||
| 372 | } else { | ||
| 373 | error = -EPERM; | ||
| 374 | } | ||
| 375 | break; | ||
| 376 | |||
| 377 | case SNAPSHOT_S2RAM: | 321 | case SNAPSHOT_S2RAM: |
| 378 | if (!data->frozen) { | 322 | if (!data->frozen) { |
| 379 | error = -EPERM; | 323 | error = -EPERM; |
| @@ -396,33 +340,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
| 396 | error = hibernation_platform_enter(); | 340 | error = hibernation_platform_enter(); |
| 397 | break; | 341 | break; |
| 398 | 342 | ||
| 399 | case SNAPSHOT_PMOPS: /* This ioctl is deprecated */ | ||
| 400 | snapshot_deprecated_ioctl(cmd); | ||
| 401 | error = -EINVAL; | ||
| 402 | |||
| 403 | switch (arg) { | ||
| 404 | |||
| 405 | case PMOPS_PREPARE: | ||
| 406 | data->platform_support = 1; | ||
| 407 | error = 0; | ||
| 408 | break; | ||
| 409 | |||
| 410 | case PMOPS_ENTER: | ||
| 411 | if (data->platform_support) | ||
| 412 | error = hibernation_platform_enter(); | ||
| 413 | break; | ||
| 414 | |||
| 415 | case PMOPS_FINISH: | ||
| 416 | if (data->platform_support) | ||
| 417 | error = 0; | ||
| 418 | break; | ||
| 419 | |||
| 420 | default: | ||
| 421 | printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg); | ||
| 422 | |||
| 423 | } | ||
| 424 | break; | ||
| 425 | |||
| 426 | case SNAPSHOT_SET_SWAP_AREA: | 343 | case SNAPSHOT_SET_SWAP_AREA: |
| 427 | if (swsusp_swap_in_use()) { | 344 | if (swsusp_swap_in_use()) { |
| 428 | error = -EPERM; | 345 | error = -EPERM; |
| @@ -464,6 +381,66 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
| 464 | return error; | 381 | return error; |
| 465 | } | 382 | } |
| 466 | 383 | ||
| 384 | #ifdef CONFIG_COMPAT | ||
| 385 | |||
| 386 | struct compat_resume_swap_area { | ||
| 387 | compat_loff_t offset; | ||
| 388 | u32 dev; | ||
| 389 | } __packed; | ||
| 390 | |||
| 391 | static long | ||
| 392 | snapshot_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
| 393 | { | ||
| 394 | BUILD_BUG_ON(sizeof(loff_t) != sizeof(compat_loff_t)); | ||
| 395 | |||
| 396 | switch (cmd) { | ||
| 397 | case SNAPSHOT_GET_IMAGE_SIZE: | ||
| 398 | case SNAPSHOT_AVAIL_SWAP_SIZE: | ||
| 399 | case SNAPSHOT_ALLOC_SWAP_PAGE: { | ||
| 400 | compat_loff_t __user *uoffset = compat_ptr(arg); | ||
| 401 | loff_t offset; | ||
| 402 | mm_segment_t old_fs; | ||
| 403 | int err; | ||
| 404 | |||
| 405 | old_fs = get_fs(); | ||
| 406 | set_fs(KERNEL_DS); | ||
| 407 | err = snapshot_ioctl(file, cmd, (unsigned long) &offset); | ||
| 408 | set_fs(old_fs); | ||
| 409 | if (!err && put_user(offset, uoffset)) | ||
| 410 | err = -EFAULT; | ||
| 411 | return err; | ||
| 412 | } | ||
| 413 | |||
| 414 | case SNAPSHOT_CREATE_IMAGE: | ||
| 415 | return snapshot_ioctl(file, cmd, | ||
| 416 | (unsigned long) compat_ptr(arg)); | ||
| 417 | |||
| 418 | case SNAPSHOT_SET_SWAP_AREA: { | ||
| 419 | struct compat_resume_swap_area __user *u_swap_area = | ||
| 420 | compat_ptr(arg); | ||
| 421 | struct resume_swap_area swap_area; | ||
| 422 | mm_segment_t old_fs; | ||
| 423 | int err; | ||
| 424 | |||
| 425 | err = get_user(swap_area.offset, &u_swap_area->offset); | ||
| 426 | err |= get_user(swap_area.dev, &u_swap_area->dev); | ||
| 427 | if (err) | ||
| 428 | return -EFAULT; | ||
| 429 | old_fs = get_fs(); | ||
| 430 | set_fs(KERNEL_DS); | ||
| 431 | err = snapshot_ioctl(file, SNAPSHOT_SET_SWAP_AREA, | ||
| 432 | (unsigned long) &swap_area); | ||
| 433 | set_fs(old_fs); | ||
| 434 | return err; | ||
| 435 | } | ||
| 436 | |||
| 437 | default: | ||
| 438 | return snapshot_ioctl(file, cmd, arg); | ||
| 439 | } | ||
| 440 | } | ||
| 441 | |||
| 442 | #endif /* CONFIG_COMPAT */ | ||
| 443 | |||
| 467 | static const struct file_operations snapshot_fops = { | 444 | static const struct file_operations snapshot_fops = { |
| 468 | .open = snapshot_open, | 445 | .open = snapshot_open, |
| 469 | .release = snapshot_release, | 446 | .release = snapshot_release, |
| @@ -471,6 +448,9 @@ static const struct file_operations snapshot_fops = { | |||
| 471 | .write = snapshot_write, | 448 | .write = snapshot_write, |
| 472 | .llseek = no_llseek, | 449 | .llseek = no_llseek, |
| 473 | .unlocked_ioctl = snapshot_ioctl, | 450 | .unlocked_ioctl = snapshot_ioctl, |
| 451 | #ifdef CONFIG_COMPAT | ||
| 452 | .compat_ioctl = snapshot_compat_ioctl, | ||
| 453 | #endif | ||
| 474 | }; | 454 | }; |
| 475 | 455 | ||
| 476 | static struct miscdevice snapshot_device = { | 456 | static struct miscdevice snapshot_device = { |
diff --git a/kernel/printk.c b/kernel/printk.c index 989e4a52da76..13c0a1143f49 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -521,7 +521,7 @@ static void __call_console_drivers(unsigned start, unsigned end) | |||
| 521 | } | 521 | } |
| 522 | } | 522 | } |
| 523 | 523 | ||
| 524 | static int __read_mostly ignore_loglevel; | 524 | static bool __read_mostly ignore_loglevel; |
| 525 | 525 | ||
| 526 | static int __init ignore_loglevel_setup(char *str) | 526 | static int __init ignore_loglevel_setup(char *str) |
| 527 | { | 527 | { |
| @@ -532,7 +532,7 @@ static int __init ignore_loglevel_setup(char *str) | |||
| 532 | } | 532 | } |
| 533 | 533 | ||
| 534 | early_param("ignore_loglevel", ignore_loglevel_setup); | 534 | early_param("ignore_loglevel", ignore_loglevel_setup); |
| 535 | module_param_named(ignore_loglevel, ignore_loglevel, bool, S_IRUGO | S_IWUSR); | 535 | module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); |
| 536 | MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to" | 536 | MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to" |
| 537 | "print all kernel messages to the console."); | 537 | "print all kernel messages to the console."); |
| 538 | 538 | ||
| @@ -696,9 +696,9 @@ static void zap_locks(void) | |||
| 696 | } | 696 | } |
| 697 | 697 | ||
| 698 | #if defined(CONFIG_PRINTK_TIME) | 698 | #if defined(CONFIG_PRINTK_TIME) |
| 699 | static int printk_time = 1; | 699 | static bool printk_time = 1; |
| 700 | #else | 700 | #else |
| 701 | static int printk_time = 0; | 701 | static bool printk_time = 0; |
| 702 | #endif | 702 | #endif |
| 703 | module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); | 703 | module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); |
| 704 | 704 | ||
| @@ -1098,7 +1098,7 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha | |||
| 1098 | return -1; | 1098 | return -1; |
| 1099 | } | 1099 | } |
| 1100 | 1100 | ||
| 1101 | int console_suspend_enabled = 1; | 1101 | bool console_suspend_enabled = 1; |
| 1102 | EXPORT_SYMBOL(console_suspend_enabled); | 1102 | EXPORT_SYMBOL(console_suspend_enabled); |
| 1103 | 1103 | ||
| 1104 | static int __init console_suspend_disable(char *str) | 1104 | static int __init console_suspend_disable(char *str) |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 78ab24a7b0e4..00ab2ca5ed11 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
| @@ -172,6 +172,14 @@ int ptrace_check_attach(struct task_struct *child, bool ignore_state) | |||
| 172 | return ret; | 172 | return ret; |
| 173 | } | 173 | } |
| 174 | 174 | ||
| 175 | static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) | ||
| 176 | { | ||
| 177 | if (mode & PTRACE_MODE_NOAUDIT) | ||
| 178 | return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); | ||
| 179 | else | ||
| 180 | return has_ns_capability(current, ns, CAP_SYS_PTRACE); | ||
| 181 | } | ||
| 182 | |||
| 175 | int __ptrace_may_access(struct task_struct *task, unsigned int mode) | 183 | int __ptrace_may_access(struct task_struct *task, unsigned int mode) |
| 176 | { | 184 | { |
| 177 | const struct cred *cred = current_cred(), *tcred; | 185 | const struct cred *cred = current_cred(), *tcred; |
| @@ -198,7 +206,7 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) | |||
| 198 | cred->gid == tcred->sgid && | 206 | cred->gid == tcred->sgid && |
| 199 | cred->gid == tcred->gid)) | 207 | cred->gid == tcred->gid)) |
| 200 | goto ok; | 208 | goto ok; |
| 201 | if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE)) | 209 | if (ptrace_has_cap(tcred->user->user_ns, mode)) |
| 202 | goto ok; | 210 | goto ok; |
| 203 | rcu_read_unlock(); | 211 | rcu_read_unlock(); |
| 204 | return -EPERM; | 212 | return -EPERM; |
| @@ -207,7 +215,7 @@ ok: | |||
| 207 | smp_rmb(); | 215 | smp_rmb(); |
| 208 | if (task->mm) | 216 | if (task->mm) |
| 209 | dumpable = get_dumpable(task->mm); | 217 | dumpable = get_dumpable(task->mm); |
| 210 | if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE)) | 218 | if (!dumpable && !ptrace_has_cap(task_user_ns(task), mode)) |
| 211 | return -EPERM; | 219 | return -EPERM; |
| 212 | 220 | ||
| 213 | return security_ptrace_access_check(task, mode); | 221 | return security_ptrace_access_check(task, mode); |
| @@ -277,7 +285,7 @@ static int ptrace_attach(struct task_struct *task, long request, | |||
| 277 | task->ptrace = PT_PTRACED; | 285 | task->ptrace = PT_PTRACED; |
| 278 | if (seize) | 286 | if (seize) |
| 279 | task->ptrace |= PT_SEIZED; | 287 | task->ptrace |= PT_SEIZED; |
| 280 | if (task_ns_capable(task, CAP_SYS_PTRACE)) | 288 | if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE)) |
| 281 | task->ptrace |= PT_PTRACE_CAP; | 289 | task->ptrace |= PT_PTRACE_CAP; |
| 282 | 290 | ||
| 283 | __ptrace_link(task, current); | 291 | __ptrace_link(task, current); |
diff --git a/kernel/relay.c b/kernel/relay.c index 226fade4d727..4335e1d7ee2d 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
| @@ -302,7 +302,7 @@ static void buf_unmapped_default_callback(struct rchan_buf *buf, | |||
| 302 | */ | 302 | */ |
| 303 | static struct dentry *create_buf_file_default_callback(const char *filename, | 303 | static struct dentry *create_buf_file_default_callback(const char *filename, |
| 304 | struct dentry *parent, | 304 | struct dentry *parent, |
| 305 | int mode, | 305 | umode_t mode, |
| 306 | struct rchan_buf *buf, | 306 | struct rchan_buf *buf, |
| 307 | int *is_global) | 307 | int *is_global) |
| 308 | { | 308 | { |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index 34683efa2cce..6d269cce7aa1 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
| @@ -159,8 +159,7 @@ int res_counter_memparse_write_strategy(const char *buf, | |||
| 159 | return 0; | 159 | return 0; |
| 160 | } | 160 | } |
| 161 | 161 | ||
| 162 | /* FIXME - make memparse() take const char* args */ | 162 | *res = memparse(buf, &end); |
| 163 | *res = memparse((char *)buf, &end); | ||
| 164 | if (*end != '\0') | 163 | if (*end != '\0') |
| 165 | return -EINVAL; | 164 | return -EINVAL; |
| 166 | 165 | ||
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 3d9f31cd79e7..98ec49475460 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c | |||
| @@ -6,11 +6,11 @@ | |||
| 6 | * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | 6 | * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> |
| 7 | * | 7 | * |
| 8 | */ | 8 | */ |
| 9 | #include <linux/device.h> | ||
| 9 | #include <linux/kthread.h> | 10 | #include <linux/kthread.h> |
| 10 | #include <linux/export.h> | 11 | #include <linux/export.h> |
| 11 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
| 12 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
| 13 | #include <linux/sysdev.h> | ||
| 14 | #include <linux/timer.h> | 14 | #include <linux/timer.h> |
| 15 | #include <linux/freezer.h> | 15 | #include <linux/freezer.h> |
| 16 | 16 | ||
| @@ -27,7 +27,7 @@ struct test_thread_data { | |||
| 27 | int opdata; | 27 | int opdata; |
| 28 | int mutexes[MAX_RT_TEST_MUTEXES]; | 28 | int mutexes[MAX_RT_TEST_MUTEXES]; |
| 29 | int event; | 29 | int event; |
| 30 | struct sys_device sysdev; | 30 | struct device dev; |
| 31 | }; | 31 | }; |
| 32 | 32 | ||
| 33 | static struct test_thread_data thread_data[MAX_RT_TEST_THREADS]; | 33 | static struct test_thread_data thread_data[MAX_RT_TEST_THREADS]; |
| @@ -271,7 +271,7 @@ static int test_func(void *data) | |||
| 271 | * | 271 | * |
| 272 | * opcode:data | 272 | * opcode:data |
| 273 | */ | 273 | */ |
| 274 | static ssize_t sysfs_test_command(struct sys_device *dev, struct sysdev_attribute *attr, | 274 | static ssize_t sysfs_test_command(struct device *dev, struct device_attribute *attr, |
| 275 | const char *buf, size_t count) | 275 | const char *buf, size_t count) |
| 276 | { | 276 | { |
| 277 | struct sched_param schedpar; | 277 | struct sched_param schedpar; |
| @@ -279,8 +279,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, struct sysdev_attribut | |||
| 279 | char cmdbuf[32]; | 279 | char cmdbuf[32]; |
| 280 | int op, dat, tid, ret; | 280 | int op, dat, tid, ret; |
| 281 | 281 | ||
| 282 | td = container_of(dev, struct test_thread_data, sysdev); | 282 | td = container_of(dev, struct test_thread_data, dev); |
| 283 | tid = td->sysdev.id; | 283 | tid = td->dev.id; |
| 284 | 284 | ||
| 285 | /* strings from sysfs write are not 0 terminated! */ | 285 | /* strings from sysfs write are not 0 terminated! */ |
| 286 | if (count >= sizeof(cmdbuf)) | 286 | if (count >= sizeof(cmdbuf)) |
| @@ -334,7 +334,7 @@ static ssize_t sysfs_test_command(struct sys_device *dev, struct sysdev_attribut | |||
| 334 | * @dev: thread to query | 334 | * @dev: thread to query |
| 335 | * @buf: char buffer to be filled with thread status info | 335 | * @buf: char buffer to be filled with thread status info |
| 336 | */ | 336 | */ |
| 337 | static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute *attr, | 337 | static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *attr, |
| 338 | char *buf) | 338 | char *buf) |
| 339 | { | 339 | { |
| 340 | struct test_thread_data *td; | 340 | struct test_thread_data *td; |
| @@ -342,8 +342,8 @@ static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute | |||
| 342 | char *curr = buf; | 342 | char *curr = buf; |
| 343 | int i; | 343 | int i; |
| 344 | 344 | ||
| 345 | td = container_of(dev, struct test_thread_data, sysdev); | 345 | td = container_of(dev, struct test_thread_data, dev); |
| 346 | tsk = threads[td->sysdev.id]; | 346 | tsk = threads[td->dev.id]; |
| 347 | 347 | ||
| 348 | spin_lock(&rttest_lock); | 348 | spin_lock(&rttest_lock); |
| 349 | 349 | ||
| @@ -360,28 +360,29 @@ static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute | |||
| 360 | spin_unlock(&rttest_lock); | 360 | spin_unlock(&rttest_lock); |
| 361 | 361 | ||
| 362 | curr += sprintf(curr, ", T: %p, R: %p\n", tsk, | 362 | curr += sprintf(curr, ", T: %p, R: %p\n", tsk, |
| 363 | mutexes[td->sysdev.id].owner); | 363 | mutexes[td->dev.id].owner); |
| 364 | 364 | ||
| 365 | return curr - buf; | 365 | return curr - buf; |
| 366 | } | 366 | } |
| 367 | 367 | ||
| 368 | static SYSDEV_ATTR(status, 0600, sysfs_test_status, NULL); | 368 | static DEVICE_ATTR(status, 0600, sysfs_test_status, NULL); |
| 369 | static SYSDEV_ATTR(command, 0600, NULL, sysfs_test_command); | 369 | static DEVICE_ATTR(command, 0600, NULL, sysfs_test_command); |
| 370 | 370 | ||
| 371 | static struct sysdev_class rttest_sysclass = { | 371 | static struct bus_type rttest_subsys = { |
| 372 | .name = "rttest", | 372 | .name = "rttest", |
| 373 | .dev_name = "rttest", | ||
| 373 | }; | 374 | }; |
| 374 | 375 | ||
| 375 | static int init_test_thread(int id) | 376 | static int init_test_thread(int id) |
| 376 | { | 377 | { |
| 377 | thread_data[id].sysdev.cls = &rttest_sysclass; | 378 | thread_data[id].dev.bus = &rttest_subsys; |
| 378 | thread_data[id].sysdev.id = id; | 379 | thread_data[id].dev.id = id; |
| 379 | 380 | ||
| 380 | threads[id] = kthread_run(test_func, &thread_data[id], "rt-test-%d", id); | 381 | threads[id] = kthread_run(test_func, &thread_data[id], "rt-test-%d", id); |
| 381 | if (IS_ERR(threads[id])) | 382 | if (IS_ERR(threads[id])) |
| 382 | return PTR_ERR(threads[id]); | 383 | return PTR_ERR(threads[id]); |
| 383 | 384 | ||
| 384 | return sysdev_register(&thread_data[id].sysdev); | 385 | return device_register(&thread_data[id].dev); |
| 385 | } | 386 | } |
| 386 | 387 | ||
| 387 | static int init_rttest(void) | 388 | static int init_rttest(void) |
| @@ -393,7 +394,7 @@ static int init_rttest(void) | |||
| 393 | for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) | 394 | for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) |
| 394 | rt_mutex_init(&mutexes[i]); | 395 | rt_mutex_init(&mutexes[i]); |
| 395 | 396 | ||
| 396 | ret = sysdev_class_register(&rttest_sysclass); | 397 | ret = subsys_system_register(&rttest_subsys, NULL); |
| 397 | if (ret) | 398 | if (ret) |
| 398 | return ret; | 399 | return ret; |
| 399 | 400 | ||
| @@ -401,10 +402,10 @@ static int init_rttest(void) | |||
| 401 | ret = init_test_thread(i); | 402 | ret = init_test_thread(i); |
| 402 | if (ret) | 403 | if (ret) |
| 403 | break; | 404 | break; |
| 404 | ret = sysdev_create_file(&thread_data[i].sysdev, &attr_status); | 405 | ret = device_create_file(&thread_data[i].dev, &dev_attr_status); |
| 405 | if (ret) | 406 | if (ret) |
| 406 | break; | 407 | break; |
| 407 | ret = sysdev_create_file(&thread_data[i].sysdev, &attr_command); | 408 | ret = device_create_file(&thread_data[i].dev, &dev_attr_command); |
| 408 | if (ret) | 409 | if (ret) |
| 409 | break; | 410 | break; |
| 410 | } | 411 | } |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4dbfd04a2148..df00cb09263e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -4330,7 +4330,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
| 4330 | goto out_free_cpus_allowed; | 4330 | goto out_free_cpus_allowed; |
| 4331 | } | 4331 | } |
| 4332 | retval = -EPERM; | 4332 | retval = -EPERM; |
| 4333 | if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE)) | 4333 | if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE)) |
| 4334 | goto out_unlock; | 4334 | goto out_unlock; |
| 4335 | 4335 | ||
| 4336 | retval = security_task_setscheduler(p); | 4336 | retval = security_task_setscheduler(p); |
| @@ -5176,7 +5176,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep) | |||
| 5176 | static void | 5176 | static void |
| 5177 | set_table_entry(struct ctl_table *entry, | 5177 | set_table_entry(struct ctl_table *entry, |
| 5178 | const char *procname, void *data, int maxlen, | 5178 | const char *procname, void *data, int maxlen, |
| 5179 | mode_t mode, proc_handler *proc_handler) | 5179 | umode_t mode, proc_handler *proc_handler) |
| 5180 | { | 5180 | { |
| 5181 | entry->procname = procname; | 5181 | entry->procname = procname; |
| 5182 | entry->data = data; | 5182 | entry->data = data; |
| @@ -6675,54 +6675,52 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) | |||
| 6675 | } | 6675 | } |
| 6676 | 6676 | ||
| 6677 | #ifdef CONFIG_SCHED_MC | 6677 | #ifdef CONFIG_SCHED_MC |
| 6678 | static ssize_t sched_mc_power_savings_show(struct sysdev_class *class, | 6678 | static ssize_t sched_mc_power_savings_show(struct device *dev, |
| 6679 | struct sysdev_class_attribute *attr, | 6679 | struct device_attribute *attr, |
| 6680 | char *page) | 6680 | char *buf) |
| 6681 | { | 6681 | { |
| 6682 | return sprintf(page, "%u\n", sched_mc_power_savings); | 6682 | return sprintf(buf, "%u\n", sched_mc_power_savings); |
| 6683 | } | 6683 | } |
| 6684 | static ssize_t sched_mc_power_savings_store(struct sysdev_class *class, | 6684 | static ssize_t sched_mc_power_savings_store(struct device *dev, |
| 6685 | struct sysdev_class_attribute *attr, | 6685 | struct device_attribute *attr, |
| 6686 | const char *buf, size_t count) | 6686 | const char *buf, size_t count) |
| 6687 | { | 6687 | { |
| 6688 | return sched_power_savings_store(buf, count, 0); | 6688 | return sched_power_savings_store(buf, count, 0); |
| 6689 | } | 6689 | } |
| 6690 | static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644, | 6690 | static DEVICE_ATTR(sched_mc_power_savings, 0644, |
| 6691 | sched_mc_power_savings_show, | 6691 | sched_mc_power_savings_show, |
| 6692 | sched_mc_power_savings_store); | 6692 | sched_mc_power_savings_store); |
| 6693 | #endif | 6693 | #endif |
| 6694 | 6694 | ||
| 6695 | #ifdef CONFIG_SCHED_SMT | 6695 | #ifdef CONFIG_SCHED_SMT |
| 6696 | static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev, | 6696 | static ssize_t sched_smt_power_savings_show(struct device *dev, |
| 6697 | struct sysdev_class_attribute *attr, | 6697 | struct device_attribute *attr, |
| 6698 | char *page) | 6698 | char *buf) |
| 6699 | { | 6699 | { |
| 6700 | return sprintf(page, "%u\n", sched_smt_power_savings); | 6700 | return sprintf(buf, "%u\n", sched_smt_power_savings); |
| 6701 | } | 6701 | } |
| 6702 | static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev, | 6702 | static ssize_t sched_smt_power_savings_store(struct device *dev, |
| 6703 | struct sysdev_class_attribute *attr, | 6703 | struct device_attribute *attr, |
| 6704 | const char *buf, size_t count) | 6704 | const char *buf, size_t count) |
| 6705 | { | 6705 | { |
| 6706 | return sched_power_savings_store(buf, count, 1); | 6706 | return sched_power_savings_store(buf, count, 1); |
| 6707 | } | 6707 | } |
| 6708 | static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644, | 6708 | static DEVICE_ATTR(sched_smt_power_savings, 0644, |
| 6709 | sched_smt_power_savings_show, | 6709 | sched_smt_power_savings_show, |
| 6710 | sched_smt_power_savings_store); | 6710 | sched_smt_power_savings_store); |
| 6711 | #endif | 6711 | #endif |
| 6712 | 6712 | ||
| 6713 | int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) | 6713 | int __init sched_create_sysfs_power_savings_entries(struct device *dev) |
| 6714 | { | 6714 | { |
| 6715 | int err = 0; | 6715 | int err = 0; |
| 6716 | 6716 | ||
| 6717 | #ifdef CONFIG_SCHED_SMT | 6717 | #ifdef CONFIG_SCHED_SMT |
| 6718 | if (smt_capable()) | 6718 | if (smt_capable()) |
| 6719 | err = sysfs_create_file(&cls->kset.kobj, | 6719 | err = device_create_file(dev, &dev_attr_sched_smt_power_savings); |
| 6720 | &attr_sched_smt_power_savings.attr); | ||
| 6721 | #endif | 6720 | #endif |
| 6722 | #ifdef CONFIG_SCHED_MC | 6721 | #ifdef CONFIG_SCHED_MC |
| 6723 | if (!err && mc_capable()) | 6722 | if (!err && mc_capable()) |
| 6724 | err = sysfs_create_file(&cls->kset.kobj, | 6723 | err = device_create_file(dev, &dev_attr_sched_mc_power_savings); |
| 6725 | &attr_sched_mc_power_savings.attr); | ||
| 6726 | #endif | 6724 | #endif |
| 6727 | return err; | 6725 | return err; |
| 6728 | } | 6726 | } |
| @@ -7136,10 +7134,6 @@ void set_curr_task(int cpu, struct task_struct *p) | |||
| 7136 | 7134 | ||
| 7137 | #endif | 7135 | #endif |
| 7138 | 7136 | ||
| 7139 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 7140 | #else /* !CONFIG_RT_GROUP_SCHED */ | ||
| 7141 | #endif /* CONFIG_RT_GROUP_SCHED */ | ||
| 7142 | |||
| 7143 | #ifdef CONFIG_CGROUP_SCHED | 7137 | #ifdef CONFIG_CGROUP_SCHED |
| 7144 | /* task_group_lock serializes the addition/removal of task groups */ | 7138 | /* task_group_lock serializes the addition/removal of task groups */ |
| 7145 | static DEFINE_SPINLOCK(task_group_lock); | 7139 | static DEFINE_SPINLOCK(task_group_lock); |
| @@ -7248,9 +7242,6 @@ void sched_move_task(struct task_struct *tsk) | |||
| 7248 | } | 7242 | } |
| 7249 | #endif /* CONFIG_CGROUP_SCHED */ | 7243 | #endif /* CONFIG_CGROUP_SCHED */ |
| 7250 | 7244 | ||
| 7251 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 7252 | #endif | ||
| 7253 | |||
| 7254 | #if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH) | 7245 | #if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH) |
| 7255 | static unsigned long to_ratio(u64 period, u64 runtime) | 7246 | static unsigned long to_ratio(u64 period, u64 runtime) |
| 7256 | { | 7247 | { |
| @@ -7565,24 +7556,31 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
| 7565 | sched_destroy_group(tg); | 7556 | sched_destroy_group(tg); |
| 7566 | } | 7557 | } |
| 7567 | 7558 | ||
| 7568 | static int | 7559 | static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
| 7569 | cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 7560 | struct cgroup_taskset *tset) |
| 7570 | { | 7561 | { |
| 7562 | struct task_struct *task; | ||
| 7563 | |||
| 7564 | cgroup_taskset_for_each(task, cgrp, tset) { | ||
| 7571 | #ifdef CONFIG_RT_GROUP_SCHED | 7565 | #ifdef CONFIG_RT_GROUP_SCHED |
| 7572 | if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) | 7566 | if (!sched_rt_can_attach(cgroup_tg(cgrp), task)) |
| 7573 | return -EINVAL; | 7567 | return -EINVAL; |
| 7574 | #else | 7568 | #else |
| 7575 | /* We don't support RT-tasks being in separate groups */ | 7569 | /* We don't support RT-tasks being in separate groups */ |
| 7576 | if (tsk->sched_class != &fair_sched_class) | 7570 | if (task->sched_class != &fair_sched_class) |
| 7577 | return -EINVAL; | 7571 | return -EINVAL; |
| 7578 | #endif | 7572 | #endif |
| 7573 | } | ||
| 7579 | return 0; | 7574 | return 0; |
| 7580 | } | 7575 | } |
| 7581 | 7576 | ||
| 7582 | static void | 7577 | static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
| 7583 | cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 7578 | struct cgroup_taskset *tset) |
| 7584 | { | 7579 | { |
| 7585 | sched_move_task(tsk); | 7580 | struct task_struct *task; |
| 7581 | |||
| 7582 | cgroup_taskset_for_each(task, cgrp, tset) | ||
| 7583 | sched_move_task(task); | ||
| 7586 | } | 7584 | } |
| 7587 | 7585 | ||
| 7588 | static void | 7586 | static void |
| @@ -7917,8 +7915,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
| 7917 | .name = "cpu", | 7915 | .name = "cpu", |
| 7918 | .create = cpu_cgroup_create, | 7916 | .create = cpu_cgroup_create, |
| 7919 | .destroy = cpu_cgroup_destroy, | 7917 | .destroy = cpu_cgroup_destroy, |
| 7920 | .can_attach_task = cpu_cgroup_can_attach_task, | 7918 | .can_attach = cpu_cgroup_can_attach, |
| 7921 | .attach_task = cpu_cgroup_attach_task, | 7919 | .attach = cpu_cgroup_attach, |
| 7922 | .exit = cpu_cgroup_exit, | 7920 | .exit = cpu_cgroup_exit, |
| 7923 | .populate = cpu_cgroup_populate, | 7921 | .populate = cpu_cgroup_populate, |
| 7924 | .subsys_id = cpu_cgroup_subsys_id, | 7922 | .subsys_id = cpu_cgroup_subsys_id, |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8e42de9105f8..84adb2d66cbd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -3130,8 +3130,10 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
| 3130 | } | 3130 | } |
| 3131 | 3131 | ||
| 3132 | #define LBF_ALL_PINNED 0x01 | 3132 | #define LBF_ALL_PINNED 0x01 |
| 3133 | #define LBF_NEED_BREAK 0x02 | 3133 | #define LBF_NEED_BREAK 0x02 /* clears into HAD_BREAK */ |
| 3134 | #define LBF_ABORT 0x04 | 3134 | #define LBF_HAD_BREAK 0x04 |
| 3135 | #define LBF_HAD_BREAKS 0x0C /* count HAD_BREAKs overflows into ABORT */ | ||
| 3136 | #define LBF_ABORT 0x10 | ||
| 3135 | 3137 | ||
| 3136 | /* | 3138 | /* |
| 3137 | * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? | 3139 | * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? |
| @@ -4508,7 +4510,9 @@ redo: | |||
| 4508 | goto out_balanced; | 4510 | goto out_balanced; |
| 4509 | 4511 | ||
| 4510 | if (lb_flags & LBF_NEED_BREAK) { | 4512 | if (lb_flags & LBF_NEED_BREAK) { |
| 4511 | lb_flags &= ~LBF_NEED_BREAK; | 4513 | lb_flags += LBF_HAD_BREAK - LBF_NEED_BREAK; |
| 4514 | if (lb_flags & LBF_ABORT) | ||
| 4515 | goto out_balanced; | ||
| 4512 | goto redo; | 4516 | goto redo; |
| 4513 | } | 4517 | } |
| 4514 | 4518 | ||
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 57d4b13b631d..e8d76c5895ea 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | * This defines a simple but solid secure-computing mode. | 6 | * This defines a simple but solid secure-computing mode. |
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #include <linux/audit.h> | ||
| 9 | #include <linux/seccomp.h> | 10 | #include <linux/seccomp.h> |
| 10 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
| 11 | #include <linux/compat.h> | 12 | #include <linux/compat.h> |
| @@ -54,6 +55,7 @@ void __secure_computing(int this_syscall) | |||
| 54 | #ifdef SECCOMP_DEBUG | 55 | #ifdef SECCOMP_DEBUG |
| 55 | dump_stack(); | 56 | dump_stack(); |
| 56 | #endif | 57 | #endif |
| 58 | audit_seccomp(this_syscall); | ||
| 57 | do_exit(SIGKILL); | 59 | do_exit(SIGKILL); |
| 58 | } | 60 | } |
| 59 | 61 | ||
diff --git a/kernel/signal.c b/kernel/signal.c index 56ce3a618b28..c73c4284160e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <linux/freezer.h> | 28 | #include <linux/freezer.h> |
| 29 | #include <linux/pid_namespace.h> | 29 | #include <linux/pid_namespace.h> |
| 30 | #include <linux/nsproxy.h> | 30 | #include <linux/nsproxy.h> |
| 31 | #include <linux/user_namespace.h> | ||
| 31 | #define CREATE_TRACE_POINTS | 32 | #define CREATE_TRACE_POINTS |
| 32 | #include <trace/events/signal.h> | 33 | #include <trace/events/signal.h> |
| 33 | 34 | ||
| @@ -1019,6 +1020,34 @@ static inline int legacy_queue(struct sigpending *signals, int sig) | |||
| 1019 | return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); | 1020 | return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); |
| 1020 | } | 1021 | } |
| 1021 | 1022 | ||
| 1023 | /* | ||
| 1024 | * map the uid in struct cred into user namespace *ns | ||
| 1025 | */ | ||
| 1026 | static inline uid_t map_cred_ns(const struct cred *cred, | ||
| 1027 | struct user_namespace *ns) | ||
| 1028 | { | ||
| 1029 | return user_ns_map_uid(ns, cred, cred->uid); | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | #ifdef CONFIG_USER_NS | ||
| 1033 | static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_struct *t) | ||
| 1034 | { | ||
| 1035 | if (current_user_ns() == task_cred_xxx(t, user_ns)) | ||
| 1036 | return; | ||
| 1037 | |||
| 1038 | if (SI_FROMKERNEL(info)) | ||
| 1039 | return; | ||
| 1040 | |||
| 1041 | info->si_uid = user_ns_map_uid(task_cred_xxx(t, user_ns), | ||
| 1042 | current_cred(), info->si_uid); | ||
| 1043 | } | ||
| 1044 | #else | ||
| 1045 | static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_struct *t) | ||
| 1046 | { | ||
| 1047 | return; | ||
| 1048 | } | ||
| 1049 | #endif | ||
| 1050 | |||
| 1022 | static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | 1051 | static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, |
| 1023 | int group, int from_ancestor_ns) | 1052 | int group, int from_ancestor_ns) |
| 1024 | { | 1053 | { |
| @@ -1088,6 +1117,9 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
| 1088 | q->info.si_pid = 0; | 1117 | q->info.si_pid = 0; |
| 1089 | break; | 1118 | break; |
| 1090 | } | 1119 | } |
| 1120 | |||
| 1121 | userns_fixup_signal_uid(&q->info, t); | ||
| 1122 | |||
| 1091 | } else if (!is_si_special(info)) { | 1123 | } else if (!is_si_special(info)) { |
| 1092 | if (sig >= SIGRTMIN && info->si_code != SI_USER) { | 1124 | if (sig >= SIGRTMIN && info->si_code != SI_USER) { |
| 1093 | /* | 1125 | /* |
| @@ -1626,7 +1658,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig) | |||
| 1626 | */ | 1658 | */ |
| 1627 | rcu_read_lock(); | 1659 | rcu_read_lock(); |
| 1628 | info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); | 1660 | info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); |
| 1629 | info.si_uid = __task_cred(tsk)->uid; | 1661 | info.si_uid = map_cred_ns(__task_cred(tsk), |
| 1662 | task_cred_xxx(tsk->parent, user_ns)); | ||
| 1630 | rcu_read_unlock(); | 1663 | rcu_read_unlock(); |
| 1631 | 1664 | ||
| 1632 | info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime); | 1665 | info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime); |
| @@ -1709,7 +1742,8 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, | |||
| 1709 | */ | 1742 | */ |
| 1710 | rcu_read_lock(); | 1743 | rcu_read_lock(); |
| 1711 | info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns); | 1744 | info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns); |
| 1712 | info.si_uid = __task_cred(tsk)->uid; | 1745 | info.si_uid = map_cred_ns(__task_cred(tsk), |
| 1746 | task_cred_xxx(parent, user_ns)); | ||
| 1713 | rcu_read_unlock(); | 1747 | rcu_read_unlock(); |
| 1714 | 1748 | ||
| 1715 | info.si_utime = cputime_to_clock_t(tsk->utime); | 1749 | info.si_utime = cputime_to_clock_t(tsk->utime); |
| @@ -2125,8 +2159,11 @@ static int ptrace_signal(int signr, siginfo_t *info, | |||
| 2125 | info->si_signo = signr; | 2159 | info->si_signo = signr; |
| 2126 | info->si_errno = 0; | 2160 | info->si_errno = 0; |
| 2127 | info->si_code = SI_USER; | 2161 | info->si_code = SI_USER; |
| 2162 | rcu_read_lock(); | ||
| 2128 | info->si_pid = task_pid_vnr(current->parent); | 2163 | info->si_pid = task_pid_vnr(current->parent); |
| 2129 | info->si_uid = task_uid(current->parent); | 2164 | info->si_uid = map_cred_ns(__task_cred(current->parent), |
| 2165 | current_user_ns()); | ||
| 2166 | rcu_read_unlock(); | ||
| 2130 | } | 2167 | } |
| 2131 | 2168 | ||
| 2132 | /* If the (new) signal is now blocked, requeue it. */ | 2169 | /* If the (new) signal is now blocked, requeue it. */ |
| @@ -2318,6 +2355,27 @@ relock: | |||
| 2318 | return signr; | 2355 | return signr; |
| 2319 | } | 2356 | } |
| 2320 | 2357 | ||
| 2358 | /** | ||
| 2359 | * block_sigmask - add @ka's signal mask to current->blocked | ||
| 2360 | * @ka: action for @signr | ||
| 2361 | * @signr: signal that has been successfully delivered | ||
| 2362 | * | ||
| 2363 | * This function should be called when a signal has succesfully been | ||
| 2364 | * delivered. It adds the mask of signals for @ka to current->blocked | ||
| 2365 | * so that they are blocked during the execution of the signal | ||
| 2366 | * handler. In addition, @signr will be blocked unless %SA_NODEFER is | ||
| 2367 | * set in @ka->sa.sa_flags. | ||
| 2368 | */ | ||
| 2369 | void block_sigmask(struct k_sigaction *ka, int signr) | ||
| 2370 | { | ||
| 2371 | sigset_t blocked; | ||
| 2372 | |||
| 2373 | sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); | ||
| 2374 | if (!(ka->sa.sa_flags & SA_NODEFER)) | ||
| 2375 | sigaddset(&blocked, signr); | ||
| 2376 | set_current_blocked(&blocked); | ||
| 2377 | } | ||
| 2378 | |||
| 2321 | /* | 2379 | /* |
| 2322 | * It could be that complete_signal() picked us to notify about the | 2380 | * It could be that complete_signal() picked us to notify about the |
| 2323 | * group-wide signal. Other threads should be notified now to take | 2381 | * group-wide signal. Other threads should be notified now to take |
| @@ -2355,8 +2413,15 @@ void exit_signals(struct task_struct *tsk) | |||
| 2355 | int group_stop = 0; | 2413 | int group_stop = 0; |
| 2356 | sigset_t unblocked; | 2414 | sigset_t unblocked; |
| 2357 | 2415 | ||
| 2416 | /* | ||
| 2417 | * @tsk is about to have PF_EXITING set - lock out users which | ||
| 2418 | * expect stable threadgroup. | ||
| 2419 | */ | ||
| 2420 | threadgroup_change_begin(tsk); | ||
| 2421 | |||
| 2358 | if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { | 2422 | if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { |
| 2359 | tsk->flags |= PF_EXITING; | 2423 | tsk->flags |= PF_EXITING; |
| 2424 | threadgroup_change_end(tsk); | ||
| 2360 | return; | 2425 | return; |
| 2361 | } | 2426 | } |
| 2362 | 2427 | ||
| @@ -2366,6 +2431,9 @@ void exit_signals(struct task_struct *tsk) | |||
| 2366 | * see wants_signal(), do_signal_stop(). | 2431 | * see wants_signal(), do_signal_stop(). |
| 2367 | */ | 2432 | */ |
| 2368 | tsk->flags |= PF_EXITING; | 2433 | tsk->flags |= PF_EXITING; |
| 2434 | |||
| 2435 | threadgroup_change_end(tsk); | ||
| 2436 | |||
| 2369 | if (!signal_pending(tsk)) | 2437 | if (!signal_pending(tsk)) |
| 2370 | goto out; | 2438 | goto out; |
| 2371 | 2439 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index ddf8155bf3f8..40701538fbd1 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -1692,6 +1692,124 @@ SYSCALL_DEFINE1(umask, int, mask) | |||
| 1692 | return mask; | 1692 | return mask; |
| 1693 | } | 1693 | } |
| 1694 | 1694 | ||
| 1695 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
| 1696 | static int prctl_set_mm(int opt, unsigned long addr, | ||
| 1697 | unsigned long arg4, unsigned long arg5) | ||
| 1698 | { | ||
| 1699 | unsigned long rlim = rlimit(RLIMIT_DATA); | ||
| 1700 | unsigned long vm_req_flags; | ||
| 1701 | unsigned long vm_bad_flags; | ||
| 1702 | struct vm_area_struct *vma; | ||
| 1703 | int error = 0; | ||
| 1704 | struct mm_struct *mm = current->mm; | ||
| 1705 | |||
| 1706 | if (arg4 | arg5) | ||
| 1707 | return -EINVAL; | ||
| 1708 | |||
| 1709 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1710 | return -EPERM; | ||
| 1711 | |||
| 1712 | if (addr >= TASK_SIZE) | ||
| 1713 | return -EINVAL; | ||
| 1714 | |||
| 1715 | down_read(&mm->mmap_sem); | ||
| 1716 | vma = find_vma(mm, addr); | ||
| 1717 | |||
| 1718 | if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) { | ||
| 1719 | /* It must be existing VMA */ | ||
| 1720 | if (!vma || vma->vm_start > addr) | ||
| 1721 | goto out; | ||
| 1722 | } | ||
| 1723 | |||
| 1724 | error = -EINVAL; | ||
| 1725 | switch (opt) { | ||
| 1726 | case PR_SET_MM_START_CODE: | ||
| 1727 | case PR_SET_MM_END_CODE: | ||
| 1728 | vm_req_flags = VM_READ | VM_EXEC; | ||
| 1729 | vm_bad_flags = VM_WRITE | VM_MAYSHARE; | ||
| 1730 | |||
| 1731 | if ((vma->vm_flags & vm_req_flags) != vm_req_flags || | ||
| 1732 | (vma->vm_flags & vm_bad_flags)) | ||
| 1733 | goto out; | ||
| 1734 | |||
| 1735 | if (opt == PR_SET_MM_START_CODE) | ||
| 1736 | mm->start_code = addr; | ||
| 1737 | else | ||
| 1738 | mm->end_code = addr; | ||
| 1739 | break; | ||
| 1740 | |||
| 1741 | case PR_SET_MM_START_DATA: | ||
| 1742 | case PR_SET_MM_END_DATA: | ||
| 1743 | vm_req_flags = VM_READ | VM_WRITE; | ||
| 1744 | vm_bad_flags = VM_EXEC | VM_MAYSHARE; | ||
| 1745 | |||
| 1746 | if ((vma->vm_flags & vm_req_flags) != vm_req_flags || | ||
| 1747 | (vma->vm_flags & vm_bad_flags)) | ||
| 1748 | goto out; | ||
| 1749 | |||
| 1750 | if (opt == PR_SET_MM_START_DATA) | ||
| 1751 | mm->start_data = addr; | ||
| 1752 | else | ||
| 1753 | mm->end_data = addr; | ||
| 1754 | break; | ||
| 1755 | |||
| 1756 | case PR_SET_MM_START_STACK: | ||
| 1757 | |||
| 1758 | #ifdef CONFIG_STACK_GROWSUP | ||
| 1759 | vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP; | ||
| 1760 | #else | ||
| 1761 | vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN; | ||
| 1762 | #endif | ||
| 1763 | if ((vma->vm_flags & vm_req_flags) != vm_req_flags) | ||
| 1764 | goto out; | ||
| 1765 | |||
| 1766 | mm->start_stack = addr; | ||
| 1767 | break; | ||
| 1768 | |||
| 1769 | case PR_SET_MM_START_BRK: | ||
| 1770 | if (addr <= mm->end_data) | ||
| 1771 | goto out; | ||
| 1772 | |||
| 1773 | if (rlim < RLIM_INFINITY && | ||
| 1774 | (mm->brk - addr) + | ||
| 1775 | (mm->end_data - mm->start_data) > rlim) | ||
| 1776 | goto out; | ||
| 1777 | |||
| 1778 | mm->start_brk = addr; | ||
| 1779 | break; | ||
| 1780 | |||
| 1781 | case PR_SET_MM_BRK: | ||
| 1782 | if (addr <= mm->end_data) | ||
| 1783 | goto out; | ||
| 1784 | |||
| 1785 | if (rlim < RLIM_INFINITY && | ||
| 1786 | (addr - mm->start_brk) + | ||
| 1787 | (mm->end_data - mm->start_data) > rlim) | ||
| 1788 | goto out; | ||
| 1789 | |||
| 1790 | mm->brk = addr; | ||
| 1791 | break; | ||
| 1792 | |||
| 1793 | default: | ||
| 1794 | error = -EINVAL; | ||
| 1795 | goto out; | ||
| 1796 | } | ||
| 1797 | |||
| 1798 | error = 0; | ||
| 1799 | |||
| 1800 | out: | ||
| 1801 | up_read(&mm->mmap_sem); | ||
| 1802 | |||
| 1803 | return error; | ||
| 1804 | } | ||
| 1805 | #else /* CONFIG_CHECKPOINT_RESTORE */ | ||
| 1806 | static int prctl_set_mm(int opt, unsigned long addr, | ||
| 1807 | unsigned long arg4, unsigned long arg5) | ||
| 1808 | { | ||
| 1809 | return -EINVAL; | ||
| 1810 | } | ||
| 1811 | #endif | ||
| 1812 | |||
| 1695 | SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | 1813 | SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, |
| 1696 | unsigned long, arg4, unsigned long, arg5) | 1814 | unsigned long, arg4, unsigned long, arg5) |
| 1697 | { | 1815 | { |
| @@ -1841,6 +1959,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
| 1841 | else | 1959 | else |
| 1842 | error = PR_MCE_KILL_DEFAULT; | 1960 | error = PR_MCE_KILL_DEFAULT; |
| 1843 | break; | 1961 | break; |
| 1962 | case PR_SET_MM: | ||
| 1963 | error = prctl_set_mm(arg2, arg3, arg4, arg5); | ||
| 1964 | break; | ||
| 1844 | default: | 1965 | default: |
| 1845 | error = -EINVAL; | 1966 | error = -EINVAL; |
| 1846 | break; | 1967 | break; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ae2719643854..f487f257e05e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -803,6 +803,15 @@ static struct ctl_table kern_table[] = { | |||
| 803 | .mode = 0644, | 803 | .mode = 0644, |
| 804 | .proc_handler = proc_dointvec, | 804 | .proc_handler = proc_dointvec, |
| 805 | }, | 805 | }, |
| 806 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
| 807 | { | ||
| 808 | .procname = "panic_on_stackoverflow", | ||
| 809 | .data = &sysctl_panic_on_stackoverflow, | ||
| 810 | .maxlen = sizeof(int), | ||
| 811 | .mode = 0644, | ||
| 812 | .proc_handler = proc_dointvec, | ||
| 813 | }, | ||
| 814 | #endif | ||
| 806 | { | 815 | { |
| 807 | .procname = "bootloader_type", | 816 | .procname = "bootloader_type", |
| 808 | .data = &bootloader_type, | 817 | .data = &bootloader_type, |
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index b26c2228fe92..2cf9cc7aa103 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig | |||
| @@ -25,7 +25,7 @@ config HIGH_RES_TIMERS | |||
| 25 | config GENERIC_CLOCKEVENTS_BUILD | 25 | config GENERIC_CLOCKEVENTS_BUILD |
| 26 | bool | 26 | bool |
| 27 | default y | 27 | default y |
| 28 | depends on GENERIC_CLOCKEVENTS || GENERIC_CLOCKEVENTS_MIGR | 28 | depends on GENERIC_CLOCKEVENTS |
| 29 | 29 | ||
| 30 | config GENERIC_CLOCKEVENTS_MIN_ADJUST | 30 | config GENERIC_CLOCKEVENTS_MIN_ADJUST |
| 31 | bool | 31 | bool |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 1ecd6ba36d6c..9cd928f7a7c6 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
| 18 | #include <linux/notifier.h> | 18 | #include <linux/notifier.h> |
| 19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
| 20 | #include <linux/sysdev.h> | ||
| 21 | 20 | ||
| 22 | #include "tick-internal.h" | 21 | #include "tick-internal.h" |
| 23 | 22 | ||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index d3ad022136e5..a45ca167ab24 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -23,8 +23,8 @@ | |||
| 23 | * o Allow clocksource drivers to be unregistered | 23 | * o Allow clocksource drivers to be unregistered |
| 24 | */ | 24 | */ |
| 25 | 25 | ||
| 26 | #include <linux/device.h> | ||
| 26 | #include <linux/clocksource.h> | 27 | #include <linux/clocksource.h> |
| 27 | #include <linux/sysdev.h> | ||
| 28 | #include <linux/init.h> | 28 | #include <linux/init.h> |
| 29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
| 30 | #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ | 30 | #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ |
| @@ -796,8 +796,8 @@ EXPORT_SYMBOL(clocksource_unregister); | |||
| 796 | * Provides sysfs interface for listing current clocksource. | 796 | * Provides sysfs interface for listing current clocksource. |
| 797 | */ | 797 | */ |
| 798 | static ssize_t | 798 | static ssize_t |
| 799 | sysfs_show_current_clocksources(struct sys_device *dev, | 799 | sysfs_show_current_clocksources(struct device *dev, |
| 800 | struct sysdev_attribute *attr, char *buf) | 800 | struct device_attribute *attr, char *buf) |
| 801 | { | 801 | { |
| 802 | ssize_t count = 0; | 802 | ssize_t count = 0; |
| 803 | 803 | ||
| @@ -818,8 +818,8 @@ sysfs_show_current_clocksources(struct sys_device *dev, | |||
| 818 | * Takes input from sysfs interface for manually overriding the default | 818 | * Takes input from sysfs interface for manually overriding the default |
| 819 | * clocksource selection. | 819 | * clocksource selection. |
| 820 | */ | 820 | */ |
| 821 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, | 821 | static ssize_t sysfs_override_clocksource(struct device *dev, |
| 822 | struct sysdev_attribute *attr, | 822 | struct device_attribute *attr, |
| 823 | const char *buf, size_t count) | 823 | const char *buf, size_t count) |
| 824 | { | 824 | { |
| 825 | size_t ret = count; | 825 | size_t ret = count; |
| @@ -853,8 +853,8 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, | |||
| 853 | * Provides sysfs interface for listing registered clocksources | 853 | * Provides sysfs interface for listing registered clocksources |
| 854 | */ | 854 | */ |
| 855 | static ssize_t | 855 | static ssize_t |
| 856 | sysfs_show_available_clocksources(struct sys_device *dev, | 856 | sysfs_show_available_clocksources(struct device *dev, |
| 857 | struct sysdev_attribute *attr, | 857 | struct device_attribute *attr, |
| 858 | char *buf) | 858 | char *buf) |
| 859 | { | 859 | { |
| 860 | struct clocksource *src; | 860 | struct clocksource *src; |
| @@ -883,35 +883,36 @@ sysfs_show_available_clocksources(struct sys_device *dev, | |||
| 883 | /* | 883 | /* |
| 884 | * Sysfs setup bits: | 884 | * Sysfs setup bits: |
| 885 | */ | 885 | */ |
| 886 | static SYSDEV_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, | 886 | static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, |
| 887 | sysfs_override_clocksource); | 887 | sysfs_override_clocksource); |
| 888 | 888 | ||
| 889 | static SYSDEV_ATTR(available_clocksource, 0444, | 889 | static DEVICE_ATTR(available_clocksource, 0444, |
| 890 | sysfs_show_available_clocksources, NULL); | 890 | sysfs_show_available_clocksources, NULL); |
| 891 | 891 | ||
| 892 | static struct sysdev_class clocksource_sysclass = { | 892 | static struct bus_type clocksource_subsys = { |
| 893 | .name = "clocksource", | 893 | .name = "clocksource", |
| 894 | .dev_name = "clocksource", | ||
| 894 | }; | 895 | }; |
| 895 | 896 | ||
| 896 | static struct sys_device device_clocksource = { | 897 | static struct device device_clocksource = { |
| 897 | .id = 0, | 898 | .id = 0, |
| 898 | .cls = &clocksource_sysclass, | 899 | .bus = &clocksource_subsys, |
| 899 | }; | 900 | }; |
| 900 | 901 | ||
| 901 | static int __init init_clocksource_sysfs(void) | 902 | static int __init init_clocksource_sysfs(void) |
| 902 | { | 903 | { |
| 903 | int error = sysdev_class_register(&clocksource_sysclass); | 904 | int error = subsys_system_register(&clocksource_subsys, NULL); |
| 904 | 905 | ||
| 905 | if (!error) | 906 | if (!error) |
| 906 | error = sysdev_register(&device_clocksource); | 907 | error = device_register(&device_clocksource); |
| 907 | if (!error) | 908 | if (!error) |
| 908 | error = sysdev_create_file( | 909 | error = device_create_file( |
| 909 | &device_clocksource, | 910 | &device_clocksource, |
| 910 | &attr_current_clocksource); | 911 | &dev_attr_current_clocksource); |
| 911 | if (!error) | 912 | if (!error) |
| 912 | error = sysdev_create_file( | 913 | error = device_create_file( |
| 913 | &device_clocksource, | 914 | &device_clocksource, |
| 914 | &attr_available_clocksource); | 915 | &dev_attr_available_clocksource); |
| 915 | return error; | 916 | return error; |
| 916 | } | 917 | } |
| 917 | 918 | ||
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 16fc34a0806f..cdea7b56b0c9 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
| @@ -402,7 +402,7 @@ static int blk_remove_buf_file_callback(struct dentry *dentry) | |||
| 402 | 402 | ||
| 403 | static struct dentry *blk_create_buf_file_callback(const char *filename, | 403 | static struct dentry *blk_create_buf_file_callback(const char *filename, |
| 404 | struct dentry *parent, | 404 | struct dentry *parent, |
| 405 | int mode, | 405 | umode_t mode, |
| 406 | struct rchan_buf *buf, | 406 | struct rchan_buf *buf, |
| 407 | int *is_global) | 407 | int *is_global) |
| 408 | { | 408 | { |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b1e8943fed1d..683d559a0eef 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -22,11 +22,13 @@ | |||
| 22 | #include <linux/hardirq.h> | 22 | #include <linux/hardirq.h> |
| 23 | #include <linux/kthread.h> | 23 | #include <linux/kthread.h> |
| 24 | #include <linux/uaccess.h> | 24 | #include <linux/uaccess.h> |
| 25 | #include <linux/bsearch.h> | ||
| 25 | #include <linux/module.h> | 26 | #include <linux/module.h> |
| 26 | #include <linux/ftrace.h> | 27 | #include <linux/ftrace.h> |
| 27 | #include <linux/sysctl.h> | 28 | #include <linux/sysctl.h> |
| 28 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
| 29 | #include <linux/ctype.h> | 30 | #include <linux/ctype.h> |
| 31 | #include <linux/sort.h> | ||
| 30 | #include <linux/list.h> | 32 | #include <linux/list.h> |
| 31 | #include <linux/hash.h> | 33 | #include <linux/hash.h> |
| 32 | #include <linux/rcupdate.h> | 34 | #include <linux/rcupdate.h> |
| @@ -947,13 +949,6 @@ struct ftrace_func_probe { | |||
| 947 | struct rcu_head rcu; | 949 | struct rcu_head rcu; |
| 948 | }; | 950 | }; |
| 949 | 951 | ||
| 950 | enum { | ||
| 951 | FTRACE_ENABLE_CALLS = (1 << 0), | ||
| 952 | FTRACE_DISABLE_CALLS = (1 << 1), | ||
| 953 | FTRACE_UPDATE_TRACE_FUNC = (1 << 2), | ||
| 954 | FTRACE_START_FUNC_RET = (1 << 3), | ||
| 955 | FTRACE_STOP_FUNC_RET = (1 << 4), | ||
| 956 | }; | ||
| 957 | struct ftrace_func_entry { | 952 | struct ftrace_func_entry { |
| 958 | struct hlist_node hlist; | 953 | struct hlist_node hlist; |
| 959 | unsigned long ip; | 954 | unsigned long ip; |
| @@ -984,18 +979,19 @@ static struct ftrace_ops global_ops = { | |||
| 984 | .filter_hash = EMPTY_HASH, | 979 | .filter_hash = EMPTY_HASH, |
| 985 | }; | 980 | }; |
| 986 | 981 | ||
| 987 | static struct dyn_ftrace *ftrace_new_addrs; | ||
| 988 | |||
| 989 | static DEFINE_MUTEX(ftrace_regex_lock); | 982 | static DEFINE_MUTEX(ftrace_regex_lock); |
| 990 | 983 | ||
| 991 | struct ftrace_page { | 984 | struct ftrace_page { |
| 992 | struct ftrace_page *next; | 985 | struct ftrace_page *next; |
| 986 | struct dyn_ftrace *records; | ||
| 993 | int index; | 987 | int index; |
| 994 | struct dyn_ftrace records[]; | 988 | int size; |
| 995 | }; | 989 | }; |
| 996 | 990 | ||
| 997 | #define ENTRIES_PER_PAGE \ | 991 | static struct ftrace_page *ftrace_new_pgs; |
| 998 | ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) | 992 | |
| 993 | #define ENTRY_SIZE sizeof(struct dyn_ftrace) | ||
| 994 | #define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE) | ||
| 999 | 995 | ||
| 1000 | /* estimate from running different kernels */ | 996 | /* estimate from running different kernels */ |
| 1001 | #define NR_TO_INIT 10000 | 997 | #define NR_TO_INIT 10000 |
| @@ -1003,7 +999,10 @@ struct ftrace_page { | |||
| 1003 | static struct ftrace_page *ftrace_pages_start; | 999 | static struct ftrace_page *ftrace_pages_start; |
| 1004 | static struct ftrace_page *ftrace_pages; | 1000 | static struct ftrace_page *ftrace_pages; |
| 1005 | 1001 | ||
| 1006 | static struct dyn_ftrace *ftrace_free_records; | 1002 | static bool ftrace_hash_empty(struct ftrace_hash *hash) |
| 1003 | { | ||
| 1004 | return !hash || !hash->count; | ||
| 1005 | } | ||
| 1007 | 1006 | ||
| 1008 | static struct ftrace_func_entry * | 1007 | static struct ftrace_func_entry * |
| 1009 | ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) | 1008 | ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) |
| @@ -1013,7 +1012,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) | |||
| 1013 | struct hlist_head *hhd; | 1012 | struct hlist_head *hhd; |
| 1014 | struct hlist_node *n; | 1013 | struct hlist_node *n; |
| 1015 | 1014 | ||
| 1016 | if (!hash->count) | 1015 | if (ftrace_hash_empty(hash)) |
| 1017 | return NULL; | 1016 | return NULL; |
| 1018 | 1017 | ||
| 1019 | if (hash->size_bits > 0) | 1018 | if (hash->size_bits > 0) |
| @@ -1157,7 +1156,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) | |||
| 1157 | return NULL; | 1156 | return NULL; |
| 1158 | 1157 | ||
| 1159 | /* Empty hash? */ | 1158 | /* Empty hash? */ |
| 1160 | if (!hash || !hash->count) | 1159 | if (ftrace_hash_empty(hash)) |
| 1161 | return new_hash; | 1160 | return new_hash; |
| 1162 | 1161 | ||
| 1163 | size = 1 << hash->size_bits; | 1162 | size = 1 << hash->size_bits; |
| @@ -1282,9 +1281,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) | |||
| 1282 | filter_hash = rcu_dereference_raw(ops->filter_hash); | 1281 | filter_hash = rcu_dereference_raw(ops->filter_hash); |
| 1283 | notrace_hash = rcu_dereference_raw(ops->notrace_hash); | 1282 | notrace_hash = rcu_dereference_raw(ops->notrace_hash); |
| 1284 | 1283 | ||
| 1285 | if ((!filter_hash || !filter_hash->count || | 1284 | if ((ftrace_hash_empty(filter_hash) || |
| 1286 | ftrace_lookup_ip(filter_hash, ip)) && | 1285 | ftrace_lookup_ip(filter_hash, ip)) && |
| 1287 | (!notrace_hash || !notrace_hash->count || | 1286 | (ftrace_hash_empty(notrace_hash) || |
| 1288 | !ftrace_lookup_ip(notrace_hash, ip))) | 1287 | !ftrace_lookup_ip(notrace_hash, ip))) |
| 1289 | ret = 1; | 1288 | ret = 1; |
| 1290 | else | 1289 | else |
| @@ -1307,6 +1306,47 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) | |||
| 1307 | } \ | 1306 | } \ |
| 1308 | } | 1307 | } |
| 1309 | 1308 | ||
| 1309 | |||
| 1310 | static int ftrace_cmp_recs(const void *a, const void *b) | ||
| 1311 | { | ||
| 1312 | const struct dyn_ftrace *reca = a; | ||
| 1313 | const struct dyn_ftrace *recb = b; | ||
| 1314 | |||
| 1315 | if (reca->ip > recb->ip) | ||
| 1316 | return 1; | ||
| 1317 | if (reca->ip < recb->ip) | ||
| 1318 | return -1; | ||
| 1319 | return 0; | ||
| 1320 | } | ||
| 1321 | |||
| 1322 | /** | ||
| 1323 | * ftrace_location - return true if the ip giving is a traced location | ||
| 1324 | * @ip: the instruction pointer to check | ||
| 1325 | * | ||
| 1326 | * Returns 1 if @ip given is a pointer to a ftrace location. | ||
| 1327 | * That is, the instruction that is either a NOP or call to | ||
| 1328 | * the function tracer. It checks the ftrace internal tables to | ||
| 1329 | * determine if the address belongs or not. | ||
| 1330 | */ | ||
| 1331 | int ftrace_location(unsigned long ip) | ||
| 1332 | { | ||
| 1333 | struct ftrace_page *pg; | ||
| 1334 | struct dyn_ftrace *rec; | ||
| 1335 | struct dyn_ftrace key; | ||
| 1336 | |||
| 1337 | key.ip = ip; | ||
| 1338 | |||
| 1339 | for (pg = ftrace_pages_start; pg; pg = pg->next) { | ||
| 1340 | rec = bsearch(&key, pg->records, pg->index, | ||
| 1341 | sizeof(struct dyn_ftrace), | ||
| 1342 | ftrace_cmp_recs); | ||
| 1343 | if (rec) | ||
| 1344 | return 1; | ||
| 1345 | } | ||
| 1346 | |||
| 1347 | return 0; | ||
| 1348 | } | ||
| 1349 | |||
| 1310 | static void __ftrace_hash_rec_update(struct ftrace_ops *ops, | 1350 | static void __ftrace_hash_rec_update(struct ftrace_ops *ops, |
| 1311 | int filter_hash, | 1351 | int filter_hash, |
| 1312 | bool inc) | 1352 | bool inc) |
| @@ -1336,7 +1376,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, | |||
| 1336 | if (filter_hash) { | 1376 | if (filter_hash) { |
| 1337 | hash = ops->filter_hash; | 1377 | hash = ops->filter_hash; |
| 1338 | other_hash = ops->notrace_hash; | 1378 | other_hash = ops->notrace_hash; |
| 1339 | if (!hash || !hash->count) | 1379 | if (ftrace_hash_empty(hash)) |
| 1340 | all = 1; | 1380 | all = 1; |
| 1341 | } else { | 1381 | } else { |
| 1342 | inc = !inc; | 1382 | inc = !inc; |
| @@ -1346,7 +1386,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, | |||
| 1346 | * If the notrace hash has no items, | 1386 | * If the notrace hash has no items, |
| 1347 | * then there's nothing to do. | 1387 | * then there's nothing to do. |
| 1348 | */ | 1388 | */ |
| 1349 | if (hash && !hash->count) | 1389 | if (ftrace_hash_empty(hash)) |
| 1350 | return; | 1390 | return; |
| 1351 | } | 1391 | } |
| 1352 | 1392 | ||
| @@ -1363,8 +1403,8 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, | |||
| 1363 | if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip)) | 1403 | if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip)) |
| 1364 | match = 1; | 1404 | match = 1; |
| 1365 | } else { | 1405 | } else { |
| 1366 | in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip); | 1406 | in_hash = !!ftrace_lookup_ip(hash, rec->ip); |
| 1367 | in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip); | 1407 | in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip); |
| 1368 | 1408 | ||
| 1369 | /* | 1409 | /* |
| 1370 | * | 1410 | * |
| @@ -1372,7 +1412,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, | |||
| 1372 | if (filter_hash && in_hash && !in_other_hash) | 1412 | if (filter_hash && in_hash && !in_other_hash) |
| 1373 | match = 1; | 1413 | match = 1; |
| 1374 | else if (!filter_hash && in_hash && | 1414 | else if (!filter_hash && in_hash && |
| 1375 | (in_other_hash || !other_hash->count)) | 1415 | (in_other_hash || ftrace_hash_empty(other_hash))) |
| 1376 | match = 1; | 1416 | match = 1; |
| 1377 | } | 1417 | } |
| 1378 | if (!match) | 1418 | if (!match) |
| @@ -1406,40 +1446,12 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops, | |||
| 1406 | __ftrace_hash_rec_update(ops, filter_hash, 1); | 1446 | __ftrace_hash_rec_update(ops, filter_hash, 1); |
| 1407 | } | 1447 | } |
| 1408 | 1448 | ||
| 1409 | static void ftrace_free_rec(struct dyn_ftrace *rec) | ||
| 1410 | { | ||
| 1411 | rec->freelist = ftrace_free_records; | ||
| 1412 | ftrace_free_records = rec; | ||
| 1413 | rec->flags |= FTRACE_FL_FREE; | ||
| 1414 | } | ||
| 1415 | |||
| 1416 | static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) | 1449 | static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) |
| 1417 | { | 1450 | { |
| 1418 | struct dyn_ftrace *rec; | 1451 | if (ftrace_pages->index == ftrace_pages->size) { |
| 1419 | 1452 | /* We should have allocated enough */ | |
| 1420 | /* First check for freed records */ | 1453 | if (WARN_ON(!ftrace_pages->next)) |
| 1421 | if (ftrace_free_records) { | ||
| 1422 | rec = ftrace_free_records; | ||
| 1423 | |||
| 1424 | if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { | ||
| 1425 | FTRACE_WARN_ON_ONCE(1); | ||
| 1426 | ftrace_free_records = NULL; | ||
| 1427 | return NULL; | 1454 | return NULL; |
| 1428 | } | ||
| 1429 | |||
| 1430 | ftrace_free_records = rec->freelist; | ||
| 1431 | memset(rec, 0, sizeof(*rec)); | ||
| 1432 | return rec; | ||
| 1433 | } | ||
| 1434 | |||
| 1435 | if (ftrace_pages->index == ENTRIES_PER_PAGE) { | ||
| 1436 | if (!ftrace_pages->next) { | ||
| 1437 | /* allocate another page */ | ||
| 1438 | ftrace_pages->next = | ||
| 1439 | (void *)get_zeroed_page(GFP_KERNEL); | ||
| 1440 | if (!ftrace_pages->next) | ||
| 1441 | return NULL; | ||
| 1442 | } | ||
| 1443 | ftrace_pages = ftrace_pages->next; | 1455 | ftrace_pages = ftrace_pages->next; |
| 1444 | } | 1456 | } |
| 1445 | 1457 | ||
| @@ -1459,8 +1471,6 @@ ftrace_record_ip(unsigned long ip) | |||
| 1459 | return NULL; | 1471 | return NULL; |
| 1460 | 1472 | ||
| 1461 | rec->ip = ip; | 1473 | rec->ip = ip; |
| 1462 | rec->newlist = ftrace_new_addrs; | ||
| 1463 | ftrace_new_addrs = rec; | ||
| 1464 | 1474 | ||
| 1465 | return rec; | 1475 | return rec; |
| 1466 | } | 1476 | } |
| @@ -1475,7 +1485,19 @@ static void print_ip_ins(const char *fmt, unsigned char *p) | |||
| 1475 | printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); | 1485 | printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); |
| 1476 | } | 1486 | } |
| 1477 | 1487 | ||
| 1478 | static void ftrace_bug(int failed, unsigned long ip) | 1488 | /** |
| 1489 | * ftrace_bug - report and shutdown function tracer | ||
| 1490 | * @failed: The failed type (EFAULT, EINVAL, EPERM) | ||
| 1491 | * @ip: The address that failed | ||
| 1492 | * | ||
| 1493 | * The arch code that enables or disables the function tracing | ||
| 1494 | * can call ftrace_bug() when it has detected a problem in | ||
| 1495 | * modifying the code. @failed should be one of either: | ||
| 1496 | * EFAULT - if the problem happens on reading the @ip address | ||
| 1497 | * EINVAL - if what is read at @ip is not what was expected | ||
| 1498 | * EPERM - if the problem happens on writting to the @ip address | ||
| 1499 | */ | ||
| 1500 | void ftrace_bug(int failed, unsigned long ip) | ||
| 1479 | { | 1501 | { |
| 1480 | switch (failed) { | 1502 | switch (failed) { |
| 1481 | case -EFAULT: | 1503 | case -EFAULT: |
| @@ -1517,24 +1539,19 @@ int ftrace_text_reserved(void *start, void *end) | |||
| 1517 | return 0; | 1539 | return 0; |
| 1518 | } | 1540 | } |
| 1519 | 1541 | ||
| 1520 | 1542 | static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) | |
| 1521 | static int | ||
| 1522 | __ftrace_replace_code(struct dyn_ftrace *rec, int enable) | ||
| 1523 | { | 1543 | { |
| 1524 | unsigned long ftrace_addr; | ||
| 1525 | unsigned long flag = 0UL; | 1544 | unsigned long flag = 0UL; |
| 1526 | 1545 | ||
| 1527 | ftrace_addr = (unsigned long)FTRACE_ADDR; | ||
| 1528 | |||
| 1529 | /* | 1546 | /* |
| 1530 | * If we are enabling tracing: | 1547 | * If we are updating calls: |
| 1531 | * | 1548 | * |
| 1532 | * If the record has a ref count, then we need to enable it | 1549 | * If the record has a ref count, then we need to enable it |
| 1533 | * because someone is using it. | 1550 | * because someone is using it. |
| 1534 | * | 1551 | * |
| 1535 | * Otherwise we make sure its disabled. | 1552 | * Otherwise we make sure its disabled. |
| 1536 | * | 1553 | * |
| 1537 | * If we are disabling tracing, then disable all records that | 1554 | * If we are disabling calls, then disable all records that |
| 1538 | * are enabled. | 1555 | * are enabled. |
| 1539 | */ | 1556 | */ |
| 1540 | if (enable && (rec->flags & ~FTRACE_FL_MASK)) | 1557 | if (enable && (rec->flags & ~FTRACE_FL_MASK)) |
| @@ -1542,18 +1559,72 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) | |||
| 1542 | 1559 | ||
| 1543 | /* If the state of this record hasn't changed, then do nothing */ | 1560 | /* If the state of this record hasn't changed, then do nothing */ |
| 1544 | if ((rec->flags & FTRACE_FL_ENABLED) == flag) | 1561 | if ((rec->flags & FTRACE_FL_ENABLED) == flag) |
| 1545 | return 0; | 1562 | return FTRACE_UPDATE_IGNORE; |
| 1546 | 1563 | ||
| 1547 | if (flag) { | 1564 | if (flag) { |
| 1548 | rec->flags |= FTRACE_FL_ENABLED; | 1565 | if (update) |
| 1566 | rec->flags |= FTRACE_FL_ENABLED; | ||
| 1567 | return FTRACE_UPDATE_MAKE_CALL; | ||
| 1568 | } | ||
| 1569 | |||
| 1570 | if (update) | ||
| 1571 | rec->flags &= ~FTRACE_FL_ENABLED; | ||
| 1572 | |||
| 1573 | return FTRACE_UPDATE_MAKE_NOP; | ||
| 1574 | } | ||
| 1575 | |||
| 1576 | /** | ||
| 1577 | * ftrace_update_record, set a record that now is tracing or not | ||
| 1578 | * @rec: the record to update | ||
| 1579 | * @enable: set to 1 if the record is tracing, zero to force disable | ||
| 1580 | * | ||
| 1581 | * The records that represent all functions that can be traced need | ||
| 1582 | * to be updated when tracing has been enabled. | ||
| 1583 | */ | ||
| 1584 | int ftrace_update_record(struct dyn_ftrace *rec, int enable) | ||
| 1585 | { | ||
| 1586 | return ftrace_check_record(rec, enable, 1); | ||
| 1587 | } | ||
| 1588 | |||
| 1589 | /** | ||
| 1590 | * ftrace_test_record, check if the record has been enabled or not | ||
| 1591 | * @rec: the record to test | ||
| 1592 | * @enable: set to 1 to check if enabled, 0 if it is disabled | ||
| 1593 | * | ||
| 1594 | * The arch code may need to test if a record is already set to | ||
| 1595 | * tracing to determine how to modify the function code that it | ||
| 1596 | * represents. | ||
| 1597 | */ | ||
| 1598 | int ftrace_test_record(struct dyn_ftrace *rec, int enable) | ||
| 1599 | { | ||
| 1600 | return ftrace_check_record(rec, enable, 0); | ||
| 1601 | } | ||
| 1602 | |||
| 1603 | static int | ||
| 1604 | __ftrace_replace_code(struct dyn_ftrace *rec, int enable) | ||
| 1605 | { | ||
| 1606 | unsigned long ftrace_addr; | ||
| 1607 | int ret; | ||
| 1608 | |||
| 1609 | ftrace_addr = (unsigned long)FTRACE_ADDR; | ||
| 1610 | |||
| 1611 | ret = ftrace_update_record(rec, enable); | ||
| 1612 | |||
| 1613 | switch (ret) { | ||
| 1614 | case FTRACE_UPDATE_IGNORE: | ||
| 1615 | return 0; | ||
| 1616 | |||
| 1617 | case FTRACE_UPDATE_MAKE_CALL: | ||
| 1549 | return ftrace_make_call(rec, ftrace_addr); | 1618 | return ftrace_make_call(rec, ftrace_addr); |
| 1619 | |||
| 1620 | case FTRACE_UPDATE_MAKE_NOP: | ||
| 1621 | return ftrace_make_nop(NULL, rec, ftrace_addr); | ||
| 1550 | } | 1622 | } |
| 1551 | 1623 | ||
| 1552 | rec->flags &= ~FTRACE_FL_ENABLED; | 1624 | return -1; /* unknow ftrace bug */ |
| 1553 | return ftrace_make_nop(NULL, rec, ftrace_addr); | ||
| 1554 | } | 1625 | } |
| 1555 | 1626 | ||
| 1556 | static void ftrace_replace_code(int enable) | 1627 | static void ftrace_replace_code(int update) |
| 1557 | { | 1628 | { |
| 1558 | struct dyn_ftrace *rec; | 1629 | struct dyn_ftrace *rec; |
| 1559 | struct ftrace_page *pg; | 1630 | struct ftrace_page *pg; |
| @@ -1563,11 +1634,7 @@ static void ftrace_replace_code(int enable) | |||
| 1563 | return; | 1634 | return; |
| 1564 | 1635 | ||
| 1565 | do_for_each_ftrace_rec(pg, rec) { | 1636 | do_for_each_ftrace_rec(pg, rec) { |
| 1566 | /* Skip over free records */ | 1637 | failed = __ftrace_replace_code(rec, update); |
| 1567 | if (rec->flags & FTRACE_FL_FREE) | ||
| 1568 | continue; | ||
| 1569 | |||
| 1570 | failed = __ftrace_replace_code(rec, enable); | ||
| 1571 | if (failed) { | 1638 | if (failed) { |
| 1572 | ftrace_bug(failed, rec->ip); | 1639 | ftrace_bug(failed, rec->ip); |
| 1573 | /* Stop processing */ | 1640 | /* Stop processing */ |
| @@ -1576,6 +1643,78 @@ static void ftrace_replace_code(int enable) | |||
| 1576 | } while_for_each_ftrace_rec(); | 1643 | } while_for_each_ftrace_rec(); |
| 1577 | } | 1644 | } |
| 1578 | 1645 | ||
| 1646 | struct ftrace_rec_iter { | ||
| 1647 | struct ftrace_page *pg; | ||
| 1648 | int index; | ||
| 1649 | }; | ||
| 1650 | |||
| 1651 | /** | ||
| 1652 | * ftrace_rec_iter_start, start up iterating over traced functions | ||
| 1653 | * | ||
| 1654 | * Returns an iterator handle that is used to iterate over all | ||
| 1655 | * the records that represent address locations where functions | ||
| 1656 | * are traced. | ||
| 1657 | * | ||
| 1658 | * May return NULL if no records are available. | ||
| 1659 | */ | ||
| 1660 | struct ftrace_rec_iter *ftrace_rec_iter_start(void) | ||
| 1661 | { | ||
| 1662 | /* | ||
| 1663 | * We only use a single iterator. | ||
| 1664 | * Protected by the ftrace_lock mutex. | ||
| 1665 | */ | ||
| 1666 | static struct ftrace_rec_iter ftrace_rec_iter; | ||
| 1667 | struct ftrace_rec_iter *iter = &ftrace_rec_iter; | ||
| 1668 | |||
| 1669 | iter->pg = ftrace_pages_start; | ||
| 1670 | iter->index = 0; | ||
| 1671 | |||
| 1672 | /* Could have empty pages */ | ||
| 1673 | while (iter->pg && !iter->pg->index) | ||
| 1674 | iter->pg = iter->pg->next; | ||
| 1675 | |||
| 1676 | if (!iter->pg) | ||
| 1677 | return NULL; | ||
| 1678 | |||
| 1679 | return iter; | ||
| 1680 | } | ||
| 1681 | |||
| 1682 | /** | ||
| 1683 | * ftrace_rec_iter_next, get the next record to process. | ||
| 1684 | * @iter: The handle to the iterator. | ||
| 1685 | * | ||
| 1686 | * Returns the next iterator after the given iterator @iter. | ||
| 1687 | */ | ||
| 1688 | struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter) | ||
| 1689 | { | ||
| 1690 | iter->index++; | ||
| 1691 | |||
| 1692 | if (iter->index >= iter->pg->index) { | ||
| 1693 | iter->pg = iter->pg->next; | ||
| 1694 | iter->index = 0; | ||
| 1695 | |||
| 1696 | /* Could have empty pages */ | ||
| 1697 | while (iter->pg && !iter->pg->index) | ||
| 1698 | iter->pg = iter->pg->next; | ||
| 1699 | } | ||
| 1700 | |||
| 1701 | if (!iter->pg) | ||
| 1702 | return NULL; | ||
| 1703 | |||
| 1704 | return iter; | ||
| 1705 | } | ||
| 1706 | |||
| 1707 | /** | ||
| 1708 | * ftrace_rec_iter_record, get the record at the iterator location | ||
| 1709 | * @iter: The current iterator location | ||
| 1710 | * | ||
| 1711 | * Returns the record that the current @iter is at. | ||
| 1712 | */ | ||
| 1713 | struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter) | ||
| 1714 | { | ||
| 1715 | return &iter->pg->records[iter->index]; | ||
| 1716 | } | ||
| 1717 | |||
| 1579 | static int | 1718 | static int |
| 1580 | ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) | 1719 | ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) |
| 1581 | { | 1720 | { |
| @@ -1617,13 +1756,7 @@ static int __ftrace_modify_code(void *data) | |||
| 1617 | { | 1756 | { |
| 1618 | int *command = data; | 1757 | int *command = data; |
| 1619 | 1758 | ||
| 1620 | /* | 1759 | if (*command & FTRACE_UPDATE_CALLS) |
| 1621 | * Do not call function tracer while we update the code. | ||
| 1622 | * We are in stop machine, no worrying about races. | ||
| 1623 | */ | ||
| 1624 | function_trace_stop++; | ||
| 1625 | |||
| 1626 | if (*command & FTRACE_ENABLE_CALLS) | ||
| 1627 | ftrace_replace_code(1); | 1760 | ftrace_replace_code(1); |
| 1628 | else if (*command & FTRACE_DISABLE_CALLS) | 1761 | else if (*command & FTRACE_DISABLE_CALLS) |
| 1629 | ftrace_replace_code(0); | 1762 | ftrace_replace_code(0); |
| @@ -1636,21 +1769,33 @@ static int __ftrace_modify_code(void *data) | |||
| 1636 | else if (*command & FTRACE_STOP_FUNC_RET) | 1769 | else if (*command & FTRACE_STOP_FUNC_RET) |
| 1637 | ftrace_disable_ftrace_graph_caller(); | 1770 | ftrace_disable_ftrace_graph_caller(); |
| 1638 | 1771 | ||
| 1639 | #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | ||
| 1640 | /* | ||
| 1641 | * For archs that call ftrace_test_stop_func(), we must | ||
| 1642 | * wait till after we update all the function callers | ||
| 1643 | * before we update the callback. This keeps different | ||
| 1644 | * ops that record different functions from corrupting | ||
| 1645 | * each other. | ||
| 1646 | */ | ||
| 1647 | __ftrace_trace_function = __ftrace_trace_function_delay; | ||
| 1648 | #endif | ||
| 1649 | function_trace_stop--; | ||
| 1650 | |||
| 1651 | return 0; | 1772 | return 0; |
| 1652 | } | 1773 | } |
| 1653 | 1774 | ||
| 1775 | /** | ||
| 1776 | * ftrace_run_stop_machine, go back to the stop machine method | ||
| 1777 | * @command: The command to tell ftrace what to do | ||
| 1778 | * | ||
| 1779 | * If an arch needs to fall back to the stop machine method, the | ||
| 1780 | * it can call this function. | ||
| 1781 | */ | ||
| 1782 | void ftrace_run_stop_machine(int command) | ||
| 1783 | { | ||
| 1784 | stop_machine(__ftrace_modify_code, &command, NULL); | ||
| 1785 | } | ||
| 1786 | |||
| 1787 | /** | ||
| 1788 | * arch_ftrace_update_code, modify the code to trace or not trace | ||
| 1789 | * @command: The command that needs to be done | ||
| 1790 | * | ||
| 1791 | * Archs can override this function if it does not need to | ||
| 1792 | * run stop_machine() to modify code. | ||
| 1793 | */ | ||
| 1794 | void __weak arch_ftrace_update_code(int command) | ||
| 1795 | { | ||
| 1796 | ftrace_run_stop_machine(command); | ||
| 1797 | } | ||
| 1798 | |||
| 1654 | static void ftrace_run_update_code(int command) | 1799 | static void ftrace_run_update_code(int command) |
| 1655 | { | 1800 | { |
| 1656 | int ret; | 1801 | int ret; |
| @@ -1659,8 +1804,31 @@ static void ftrace_run_update_code(int command) | |||
| 1659 | FTRACE_WARN_ON(ret); | 1804 | FTRACE_WARN_ON(ret); |
| 1660 | if (ret) | 1805 | if (ret) |
| 1661 | return; | 1806 | return; |
| 1807 | /* | ||
| 1808 | * Do not call function tracer while we update the code. | ||
| 1809 | * We are in stop machine. | ||
| 1810 | */ | ||
| 1811 | function_trace_stop++; | ||
| 1662 | 1812 | ||
| 1663 | stop_machine(__ftrace_modify_code, &command, NULL); | 1813 | /* |
| 1814 | * By default we use stop_machine() to modify the code. | ||
| 1815 | * But archs can do what ever they want as long as it | ||
| 1816 | * is safe. The stop_machine() is the safest, but also | ||
| 1817 | * produces the most overhead. | ||
| 1818 | */ | ||
| 1819 | arch_ftrace_update_code(command); | ||
| 1820 | |||
| 1821 | #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | ||
| 1822 | /* | ||
| 1823 | * For archs that call ftrace_test_stop_func(), we must | ||
| 1824 | * wait till after we update all the function callers | ||
| 1825 | * before we update the callback. This keeps different | ||
| 1826 | * ops that record different functions from corrupting | ||
| 1827 | * each other. | ||
| 1828 | */ | ||
| 1829 | __ftrace_trace_function = __ftrace_trace_function_delay; | ||
| 1830 | #endif | ||
| 1831 | function_trace_stop--; | ||
| 1664 | 1832 | ||
| 1665 | ret = ftrace_arch_code_modify_post_process(); | 1833 | ret = ftrace_arch_code_modify_post_process(); |
| 1666 | FTRACE_WARN_ON(ret); | 1834 | FTRACE_WARN_ON(ret); |
| @@ -1691,7 +1859,7 @@ static int ftrace_startup(struct ftrace_ops *ops, int command) | |||
| 1691 | return -ENODEV; | 1859 | return -ENODEV; |
| 1692 | 1860 | ||
| 1693 | ftrace_start_up++; | 1861 | ftrace_start_up++; |
| 1694 | command |= FTRACE_ENABLE_CALLS; | 1862 | command |= FTRACE_UPDATE_CALLS; |
| 1695 | 1863 | ||
| 1696 | /* ops marked global share the filter hashes */ | 1864 | /* ops marked global share the filter hashes */ |
| 1697 | if (ops->flags & FTRACE_OPS_FL_GLOBAL) { | 1865 | if (ops->flags & FTRACE_OPS_FL_GLOBAL) { |
| @@ -1743,8 +1911,7 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command) | |||
| 1743 | if (ops != &global_ops || !global_start_up) | 1911 | if (ops != &global_ops || !global_start_up) |
| 1744 | ops->flags &= ~FTRACE_OPS_FL_ENABLED; | 1912 | ops->flags &= ~FTRACE_OPS_FL_ENABLED; |
| 1745 | 1913 | ||
| 1746 | if (!ftrace_start_up) | 1914 | command |= FTRACE_UPDATE_CALLS; |
| 1747 | command |= FTRACE_DISABLE_CALLS; | ||
| 1748 | 1915 | ||
| 1749 | if (saved_ftrace_func != ftrace_trace_function) { | 1916 | if (saved_ftrace_func != ftrace_trace_function) { |
| 1750 | saved_ftrace_func = ftrace_trace_function; | 1917 | saved_ftrace_func = ftrace_trace_function; |
| @@ -1766,7 +1933,7 @@ static void ftrace_startup_sysctl(void) | |||
| 1766 | saved_ftrace_func = NULL; | 1933 | saved_ftrace_func = NULL; |
| 1767 | /* ftrace_start_up is true if we want ftrace running */ | 1934 | /* ftrace_start_up is true if we want ftrace running */ |
| 1768 | if (ftrace_start_up) | 1935 | if (ftrace_start_up) |
| 1769 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); | 1936 | ftrace_run_update_code(FTRACE_UPDATE_CALLS); |
| 1770 | } | 1937 | } |
| 1771 | 1938 | ||
| 1772 | static void ftrace_shutdown_sysctl(void) | 1939 | static void ftrace_shutdown_sysctl(void) |
| @@ -1788,14 +1955,16 @@ static int ops_traces_mod(struct ftrace_ops *ops) | |||
| 1788 | struct ftrace_hash *hash; | 1955 | struct ftrace_hash *hash; |
| 1789 | 1956 | ||
| 1790 | hash = ops->filter_hash; | 1957 | hash = ops->filter_hash; |
| 1791 | return !!(!hash || !hash->count); | 1958 | return ftrace_hash_empty(hash); |
| 1792 | } | 1959 | } |
| 1793 | 1960 | ||
| 1794 | static int ftrace_update_code(struct module *mod) | 1961 | static int ftrace_update_code(struct module *mod) |
| 1795 | { | 1962 | { |
| 1963 | struct ftrace_page *pg; | ||
| 1796 | struct dyn_ftrace *p; | 1964 | struct dyn_ftrace *p; |
| 1797 | cycle_t start, stop; | 1965 | cycle_t start, stop; |
| 1798 | unsigned long ref = 0; | 1966 | unsigned long ref = 0; |
| 1967 | int i; | ||
| 1799 | 1968 | ||
| 1800 | /* | 1969 | /* |
| 1801 | * When adding a module, we need to check if tracers are | 1970 | * When adding a module, we need to check if tracers are |
| @@ -1817,46 +1986,44 @@ static int ftrace_update_code(struct module *mod) | |||
| 1817 | start = ftrace_now(raw_smp_processor_id()); | 1986 | start = ftrace_now(raw_smp_processor_id()); |
| 1818 | ftrace_update_cnt = 0; | 1987 | ftrace_update_cnt = 0; |
| 1819 | 1988 | ||
| 1820 | while (ftrace_new_addrs) { | 1989 | for (pg = ftrace_new_pgs; pg; pg = pg->next) { |
| 1821 | 1990 | ||
| 1822 | /* If something went wrong, bail without enabling anything */ | 1991 | for (i = 0; i < pg->index; i++) { |
| 1823 | if (unlikely(ftrace_disabled)) | 1992 | /* If something went wrong, bail without enabling anything */ |
| 1824 | return -1; | 1993 | if (unlikely(ftrace_disabled)) |
| 1994 | return -1; | ||
| 1825 | 1995 | ||
| 1826 | p = ftrace_new_addrs; | 1996 | p = &pg->records[i]; |
| 1827 | ftrace_new_addrs = p->newlist; | 1997 | p->flags = ref; |
| 1828 | p->flags = ref; | ||
| 1829 | 1998 | ||
| 1830 | /* | 1999 | /* |
| 1831 | * Do the initial record conversion from mcount jump | 2000 | * Do the initial record conversion from mcount jump |
| 1832 | * to the NOP instructions. | 2001 | * to the NOP instructions. |
| 1833 | */ | 2002 | */ |
| 1834 | if (!ftrace_code_disable(mod, p)) { | 2003 | if (!ftrace_code_disable(mod, p)) |
| 1835 | ftrace_free_rec(p); | 2004 | break; |
| 1836 | /* Game over */ | ||
| 1837 | break; | ||
| 1838 | } | ||
| 1839 | 2005 | ||
| 1840 | ftrace_update_cnt++; | 2006 | ftrace_update_cnt++; |
| 1841 | 2007 | ||
| 1842 | /* | 2008 | /* |
| 1843 | * If the tracing is enabled, go ahead and enable the record. | 2009 | * If the tracing is enabled, go ahead and enable the record. |
| 1844 | * | 2010 | * |
| 1845 | * The reason not to enable the record immediatelly is the | 2011 | * The reason not to enable the record immediatelly is the |
| 1846 | * inherent check of ftrace_make_nop/ftrace_make_call for | 2012 | * inherent check of ftrace_make_nop/ftrace_make_call for |
| 1847 | * correct previous instructions. Making first the NOP | 2013 | * correct previous instructions. Making first the NOP |
| 1848 | * conversion puts the module to the correct state, thus | 2014 | * conversion puts the module to the correct state, thus |
| 1849 | * passing the ftrace_make_call check. | 2015 | * passing the ftrace_make_call check. |
| 1850 | */ | 2016 | */ |
| 1851 | if (ftrace_start_up && ref) { | 2017 | if (ftrace_start_up && ref) { |
| 1852 | int failed = __ftrace_replace_code(p, 1); | 2018 | int failed = __ftrace_replace_code(p, 1); |
| 1853 | if (failed) { | 2019 | if (failed) |
| 1854 | ftrace_bug(failed, p->ip); | 2020 | ftrace_bug(failed, p->ip); |
| 1855 | ftrace_free_rec(p); | ||
| 1856 | } | 2021 | } |
| 1857 | } | 2022 | } |
| 1858 | } | 2023 | } |
| 1859 | 2024 | ||
| 2025 | ftrace_new_pgs = NULL; | ||
| 2026 | |||
| 1860 | stop = ftrace_now(raw_smp_processor_id()); | 2027 | stop = ftrace_now(raw_smp_processor_id()); |
| 1861 | ftrace_update_time = stop - start; | 2028 | ftrace_update_time = stop - start; |
| 1862 | ftrace_update_tot_cnt += ftrace_update_cnt; | 2029 | ftrace_update_tot_cnt += ftrace_update_cnt; |
| @@ -1864,57 +2031,108 @@ static int ftrace_update_code(struct module *mod) | |||
| 1864 | return 0; | 2031 | return 0; |
| 1865 | } | 2032 | } |
| 1866 | 2033 | ||
| 1867 | static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) | 2034 | static int ftrace_allocate_records(struct ftrace_page *pg, int count) |
| 1868 | { | 2035 | { |
| 1869 | struct ftrace_page *pg; | 2036 | int order; |
| 1870 | int cnt; | 2037 | int cnt; |
| 1871 | int i; | ||
| 1872 | 2038 | ||
| 1873 | /* allocate a few pages */ | 2039 | if (WARN_ON(!count)) |
| 1874 | ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); | 2040 | return -EINVAL; |
| 1875 | if (!ftrace_pages_start) | 2041 | |
| 1876 | return -1; | 2042 | order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); |
| 1877 | 2043 | ||
| 1878 | /* | 2044 | /* |
| 1879 | * Allocate a few more pages. | 2045 | * We want to fill as much as possible. No more than a page |
| 1880 | * | 2046 | * may be empty. |
| 1881 | * TODO: have some parser search vmlinux before | ||
| 1882 | * final linking to find all calls to ftrace. | ||
| 1883 | * Then we can: | ||
| 1884 | * a) know how many pages to allocate. | ||
| 1885 | * and/or | ||
| 1886 | * b) set up the table then. | ||
| 1887 | * | ||
| 1888 | * The dynamic code is still necessary for | ||
| 1889 | * modules. | ||
| 1890 | */ | 2047 | */ |
| 2048 | while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE) | ||
| 2049 | order--; | ||
| 1891 | 2050 | ||
| 1892 | pg = ftrace_pages = ftrace_pages_start; | 2051 | again: |
| 2052 | pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); | ||
| 1893 | 2053 | ||
| 1894 | cnt = num_to_init / ENTRIES_PER_PAGE; | 2054 | if (!pg->records) { |
| 1895 | pr_info("ftrace: allocating %ld entries in %d pages\n", | 2055 | /* if we can't allocate this size, try something smaller */ |
| 1896 | num_to_init, cnt + 1); | 2056 | if (!order) |
| 2057 | return -ENOMEM; | ||
| 2058 | order >>= 1; | ||
| 2059 | goto again; | ||
| 2060 | } | ||
| 1897 | 2061 | ||
| 1898 | for (i = 0; i < cnt; i++) { | 2062 | cnt = (PAGE_SIZE << order) / ENTRY_SIZE; |
| 1899 | pg->next = (void *)get_zeroed_page(GFP_KERNEL); | 2063 | pg->size = cnt; |
| 1900 | 2064 | ||
| 1901 | /* If we fail, we'll try later anyway */ | 2065 | if (cnt > count) |
| 1902 | if (!pg->next) | 2066 | cnt = count; |
| 2067 | |||
| 2068 | return cnt; | ||
| 2069 | } | ||
| 2070 | |||
| 2071 | static struct ftrace_page * | ||
| 2072 | ftrace_allocate_pages(unsigned long num_to_init) | ||
| 2073 | { | ||
| 2074 | struct ftrace_page *start_pg; | ||
| 2075 | struct ftrace_page *pg; | ||
| 2076 | int order; | ||
| 2077 | int cnt; | ||
| 2078 | |||
| 2079 | if (!num_to_init) | ||
| 2080 | return 0; | ||
| 2081 | |||
| 2082 | start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL); | ||
| 2083 | if (!pg) | ||
| 2084 | return NULL; | ||
| 2085 | |||
| 2086 | /* | ||
| 2087 | * Try to allocate as much as possible in one continues | ||
| 2088 | * location that fills in all of the space. We want to | ||
| 2089 | * waste as little space as possible. | ||
| 2090 | */ | ||
| 2091 | for (;;) { | ||
| 2092 | cnt = ftrace_allocate_records(pg, num_to_init); | ||
| 2093 | if (cnt < 0) | ||
| 2094 | goto free_pages; | ||
| 2095 | |||
| 2096 | num_to_init -= cnt; | ||
| 2097 | if (!num_to_init) | ||
| 1903 | break; | 2098 | break; |
| 1904 | 2099 | ||
| 2100 | pg->next = kzalloc(sizeof(*pg), GFP_KERNEL); | ||
| 2101 | if (!pg->next) | ||
| 2102 | goto free_pages; | ||
| 2103 | |||
| 1905 | pg = pg->next; | 2104 | pg = pg->next; |
| 1906 | } | 2105 | } |
| 1907 | 2106 | ||
| 1908 | return 0; | 2107 | return start_pg; |
| 2108 | |||
| 2109 | free_pages: | ||
| 2110 | while (start_pg) { | ||
| 2111 | order = get_count_order(pg->size / ENTRIES_PER_PAGE); | ||
| 2112 | free_pages((unsigned long)pg->records, order); | ||
| 2113 | start_pg = pg->next; | ||
| 2114 | kfree(pg); | ||
| 2115 | pg = start_pg; | ||
| 2116 | } | ||
| 2117 | pr_info("ftrace: FAILED to allocate memory for functions\n"); | ||
| 2118 | return NULL; | ||
| 1909 | } | 2119 | } |
| 1910 | 2120 | ||
| 1911 | enum { | 2121 | static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) |
| 1912 | FTRACE_ITER_FILTER = (1 << 0), | 2122 | { |
| 1913 | FTRACE_ITER_NOTRACE = (1 << 1), | 2123 | int cnt; |
| 1914 | FTRACE_ITER_PRINTALL = (1 << 2), | 2124 | |
| 1915 | FTRACE_ITER_HASH = (1 << 3), | 2125 | if (!num_to_init) { |
| 1916 | FTRACE_ITER_ENABLED = (1 << 4), | 2126 | pr_info("ftrace: No functions to be traced?\n"); |
| 1917 | }; | 2127 | return -1; |
| 2128 | } | ||
| 2129 | |||
| 2130 | cnt = num_to_init / ENTRIES_PER_PAGE; | ||
| 2131 | pr_info("ftrace: allocating %ld entries in %d pages\n", | ||
| 2132 | num_to_init, cnt + 1); | ||
| 2133 | |||
| 2134 | return 0; | ||
| 2135 | } | ||
| 1918 | 2136 | ||
| 1919 | #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ | 2137 | #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ |
| 1920 | 2138 | ||
| @@ -1980,6 +2198,9 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos) | |||
| 1980 | void *p = NULL; | 2198 | void *p = NULL; |
| 1981 | loff_t l; | 2199 | loff_t l; |
| 1982 | 2200 | ||
| 2201 | if (!(iter->flags & FTRACE_ITER_DO_HASH)) | ||
| 2202 | return NULL; | ||
| 2203 | |||
| 1983 | if (iter->func_pos > *pos) | 2204 | if (iter->func_pos > *pos) |
| 1984 | return NULL; | 2205 | return NULL; |
| 1985 | 2206 | ||
| @@ -2023,7 +2244,7 @@ static void * | |||
| 2023 | t_next(struct seq_file *m, void *v, loff_t *pos) | 2244 | t_next(struct seq_file *m, void *v, loff_t *pos) |
| 2024 | { | 2245 | { |
| 2025 | struct ftrace_iterator *iter = m->private; | 2246 | struct ftrace_iterator *iter = m->private; |
| 2026 | struct ftrace_ops *ops = &global_ops; | 2247 | struct ftrace_ops *ops = iter->ops; |
| 2027 | struct dyn_ftrace *rec = NULL; | 2248 | struct dyn_ftrace *rec = NULL; |
| 2028 | 2249 | ||
| 2029 | if (unlikely(ftrace_disabled)) | 2250 | if (unlikely(ftrace_disabled)) |
| @@ -2047,9 +2268,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
| 2047 | } | 2268 | } |
| 2048 | } else { | 2269 | } else { |
| 2049 | rec = &iter->pg->records[iter->idx++]; | 2270 | rec = &iter->pg->records[iter->idx++]; |
| 2050 | if ((rec->flags & FTRACE_FL_FREE) || | 2271 | if (((iter->flags & FTRACE_ITER_FILTER) && |
| 2051 | |||
| 2052 | ((iter->flags & FTRACE_ITER_FILTER) && | ||
| 2053 | !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) || | 2272 | !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) || |
| 2054 | 2273 | ||
| 2055 | ((iter->flags & FTRACE_ITER_NOTRACE) && | 2274 | ((iter->flags & FTRACE_ITER_NOTRACE) && |
| @@ -2081,7 +2300,7 @@ static void reset_iter_read(struct ftrace_iterator *iter) | |||
| 2081 | static void *t_start(struct seq_file *m, loff_t *pos) | 2300 | static void *t_start(struct seq_file *m, loff_t *pos) |
| 2082 | { | 2301 | { |
| 2083 | struct ftrace_iterator *iter = m->private; | 2302 | struct ftrace_iterator *iter = m->private; |
| 2084 | struct ftrace_ops *ops = &global_ops; | 2303 | struct ftrace_ops *ops = iter->ops; |
| 2085 | void *p = NULL; | 2304 | void *p = NULL; |
| 2086 | loff_t l; | 2305 | loff_t l; |
| 2087 | 2306 | ||
| @@ -2101,7 +2320,8 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
| 2101 | * off, we can short cut and just print out that all | 2320 | * off, we can short cut and just print out that all |
| 2102 | * functions are enabled. | 2321 | * functions are enabled. |
| 2103 | */ | 2322 | */ |
| 2104 | if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) { | 2323 | if (iter->flags & FTRACE_ITER_FILTER && |
| 2324 | ftrace_hash_empty(ops->filter_hash)) { | ||
| 2105 | if (*pos > 0) | 2325 | if (*pos > 0) |
| 2106 | return t_hash_start(m, pos); | 2326 | return t_hash_start(m, pos); |
| 2107 | iter->flags |= FTRACE_ITER_PRINTALL; | 2327 | iter->flags |= FTRACE_ITER_PRINTALL; |
| @@ -2126,12 +2346,8 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
| 2126 | break; | 2346 | break; |
| 2127 | } | 2347 | } |
| 2128 | 2348 | ||
| 2129 | if (!p) { | 2349 | if (!p) |
| 2130 | if (iter->flags & FTRACE_ITER_FILTER) | 2350 | return t_hash_start(m, pos); |
| 2131 | return t_hash_start(m, pos); | ||
| 2132 | |||
| 2133 | return NULL; | ||
| 2134 | } | ||
| 2135 | 2351 | ||
| 2136 | return iter; | 2352 | return iter; |
| 2137 | } | 2353 | } |
| @@ -2189,6 +2405,7 @@ ftrace_avail_open(struct inode *inode, struct file *file) | |||
| 2189 | return -ENOMEM; | 2405 | return -ENOMEM; |
| 2190 | 2406 | ||
| 2191 | iter->pg = ftrace_pages_start; | 2407 | iter->pg = ftrace_pages_start; |
| 2408 | iter->ops = &global_ops; | ||
| 2192 | 2409 | ||
| 2193 | ret = seq_open(file, &show_ftrace_seq_ops); | 2410 | ret = seq_open(file, &show_ftrace_seq_ops); |
| 2194 | if (!ret) { | 2411 | if (!ret) { |
| @@ -2217,6 +2434,7 @@ ftrace_enabled_open(struct inode *inode, struct file *file) | |||
| 2217 | 2434 | ||
| 2218 | iter->pg = ftrace_pages_start; | 2435 | iter->pg = ftrace_pages_start; |
| 2219 | iter->flags = FTRACE_ITER_ENABLED; | 2436 | iter->flags = FTRACE_ITER_ENABLED; |
| 2437 | iter->ops = &global_ops; | ||
| 2220 | 2438 | ||
| 2221 | ret = seq_open(file, &show_ftrace_seq_ops); | 2439 | ret = seq_open(file, &show_ftrace_seq_ops); |
| 2222 | if (!ret) { | 2440 | if (!ret) { |
| @@ -2237,7 +2455,23 @@ static void ftrace_filter_reset(struct ftrace_hash *hash) | |||
| 2237 | mutex_unlock(&ftrace_lock); | 2455 | mutex_unlock(&ftrace_lock); |
| 2238 | } | 2456 | } |
| 2239 | 2457 | ||
| 2240 | static int | 2458 | /** |
| 2459 | * ftrace_regex_open - initialize function tracer filter files | ||
| 2460 | * @ops: The ftrace_ops that hold the hash filters | ||
| 2461 | * @flag: The type of filter to process | ||
| 2462 | * @inode: The inode, usually passed in to your open routine | ||
| 2463 | * @file: The file, usually passed in to your open routine | ||
| 2464 | * | ||
| 2465 | * ftrace_regex_open() initializes the filter files for the | ||
| 2466 | * @ops. Depending on @flag it may process the filter hash or | ||
| 2467 | * the notrace hash of @ops. With this called from the open | ||
| 2468 | * routine, you can use ftrace_filter_write() for the write | ||
| 2469 | * routine if @flag has FTRACE_ITER_FILTER set, or | ||
| 2470 | * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set. | ||
| 2471 | * ftrace_regex_lseek() should be used as the lseek routine, and | ||
| 2472 | * release must call ftrace_regex_release(). | ||
| 2473 | */ | ||
| 2474 | int | ||
| 2241 | ftrace_regex_open(struct ftrace_ops *ops, int flag, | 2475 | ftrace_regex_open(struct ftrace_ops *ops, int flag, |
| 2242 | struct inode *inode, struct file *file) | 2476 | struct inode *inode, struct file *file) |
| 2243 | { | 2477 | { |
| @@ -2306,8 +2540,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, | |||
| 2306 | static int | 2540 | static int |
| 2307 | ftrace_filter_open(struct inode *inode, struct file *file) | 2541 | ftrace_filter_open(struct inode *inode, struct file *file) |
| 2308 | { | 2542 | { |
| 2309 | return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER, | 2543 | return ftrace_regex_open(&global_ops, |
| 2310 | inode, file); | 2544 | FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH, |
| 2545 | inode, file); | ||
| 2311 | } | 2546 | } |
| 2312 | 2547 | ||
| 2313 | static int | 2548 | static int |
| @@ -2317,7 +2552,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file) | |||
| 2317 | inode, file); | 2552 | inode, file); |
| 2318 | } | 2553 | } |
| 2319 | 2554 | ||
| 2320 | static loff_t | 2555 | loff_t |
| 2321 | ftrace_regex_lseek(struct file *file, loff_t offset, int origin) | 2556 | ftrace_regex_lseek(struct file *file, loff_t offset, int origin) |
| 2322 | { | 2557 | { |
| 2323 | loff_t ret; | 2558 | loff_t ret; |
| @@ -2426,7 +2661,6 @@ match_records(struct ftrace_hash *hash, char *buff, | |||
| 2426 | goto out_unlock; | 2661 | goto out_unlock; |
| 2427 | 2662 | ||
| 2428 | do_for_each_ftrace_rec(pg, rec) { | 2663 | do_for_each_ftrace_rec(pg, rec) { |
| 2429 | |||
| 2430 | if (ftrace_match_record(rec, mod, search, search_len, type)) { | 2664 | if (ftrace_match_record(rec, mod, search, search_len, type)) { |
| 2431 | ret = enter_record(hash, rec, not); | 2665 | ret = enter_record(hash, rec, not); |
| 2432 | if (ret < 0) { | 2666 | if (ret < 0) { |
| @@ -2871,14 +3105,14 @@ out_unlock: | |||
| 2871 | return ret; | 3105 | return ret; |
| 2872 | } | 3106 | } |
| 2873 | 3107 | ||
| 2874 | static ssize_t | 3108 | ssize_t |
| 2875 | ftrace_filter_write(struct file *file, const char __user *ubuf, | 3109 | ftrace_filter_write(struct file *file, const char __user *ubuf, |
| 2876 | size_t cnt, loff_t *ppos) | 3110 | size_t cnt, loff_t *ppos) |
| 2877 | { | 3111 | { |
| 2878 | return ftrace_regex_write(file, ubuf, cnt, ppos, 1); | 3112 | return ftrace_regex_write(file, ubuf, cnt, ppos, 1); |
| 2879 | } | 3113 | } |
| 2880 | 3114 | ||
| 2881 | static ssize_t | 3115 | ssize_t |
| 2882 | ftrace_notrace_write(struct file *file, const char __user *ubuf, | 3116 | ftrace_notrace_write(struct file *file, const char __user *ubuf, |
| 2883 | size_t cnt, loff_t *ppos) | 3117 | size_t cnt, loff_t *ppos) |
| 2884 | { | 3118 | { |
| @@ -2919,7 +3153,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, | |||
| 2919 | ret = ftrace_hash_move(ops, enable, orig_hash, hash); | 3153 | ret = ftrace_hash_move(ops, enable, orig_hash, hash); |
| 2920 | if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED | 3154 | if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED |
| 2921 | && ftrace_enabled) | 3155 | && ftrace_enabled) |
| 2922 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); | 3156 | ftrace_run_update_code(FTRACE_UPDATE_CALLS); |
| 2923 | 3157 | ||
| 2924 | mutex_unlock(&ftrace_lock); | 3158 | mutex_unlock(&ftrace_lock); |
| 2925 | 3159 | ||
| @@ -3045,8 +3279,8 @@ static void __init set_ftrace_early_graph(char *buf) | |||
| 3045 | } | 3279 | } |
| 3046 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | 3280 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ |
| 3047 | 3281 | ||
| 3048 | static void __init | 3282 | void __init |
| 3049 | set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable) | 3283 | ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable) |
| 3050 | { | 3284 | { |
| 3051 | char *func; | 3285 | char *func; |
| 3052 | 3286 | ||
| @@ -3059,17 +3293,16 @@ set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable) | |||
| 3059 | static void __init set_ftrace_early_filters(void) | 3293 | static void __init set_ftrace_early_filters(void) |
| 3060 | { | 3294 | { |
| 3061 | if (ftrace_filter_buf[0]) | 3295 | if (ftrace_filter_buf[0]) |
| 3062 | set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1); | 3296 | ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1); |
| 3063 | if (ftrace_notrace_buf[0]) | 3297 | if (ftrace_notrace_buf[0]) |
| 3064 | set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0); | 3298 | ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0); |
| 3065 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 3299 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
| 3066 | if (ftrace_graph_buf[0]) | 3300 | if (ftrace_graph_buf[0]) |
| 3067 | set_ftrace_early_graph(ftrace_graph_buf); | 3301 | set_ftrace_early_graph(ftrace_graph_buf); |
| 3068 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | 3302 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ |
| 3069 | } | 3303 | } |
| 3070 | 3304 | ||
| 3071 | static int | 3305 | int ftrace_regex_release(struct inode *inode, struct file *file) |
| 3072 | ftrace_regex_release(struct inode *inode, struct file *file) | ||
| 3073 | { | 3306 | { |
| 3074 | struct seq_file *m = (struct seq_file *)file->private_data; | 3307 | struct seq_file *m = (struct seq_file *)file->private_data; |
| 3075 | struct ftrace_iterator *iter; | 3308 | struct ftrace_iterator *iter; |
| @@ -3107,7 +3340,7 @@ ftrace_regex_release(struct inode *inode, struct file *file) | |||
| 3107 | orig_hash, iter->hash); | 3340 | orig_hash, iter->hash); |
| 3108 | if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED) | 3341 | if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED) |
| 3109 | && ftrace_enabled) | 3342 | && ftrace_enabled) |
| 3110 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); | 3343 | ftrace_run_update_code(FTRACE_UPDATE_CALLS); |
| 3111 | 3344 | ||
| 3112 | mutex_unlock(&ftrace_lock); | 3345 | mutex_unlock(&ftrace_lock); |
| 3113 | } | 3346 | } |
| @@ -3270,9 +3503,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) | |||
| 3270 | 3503 | ||
| 3271 | do_for_each_ftrace_rec(pg, rec) { | 3504 | do_for_each_ftrace_rec(pg, rec) { |
| 3272 | 3505 | ||
| 3273 | if (rec->flags & FTRACE_FL_FREE) | ||
| 3274 | continue; | ||
| 3275 | |||
| 3276 | if (ftrace_match_record(rec, NULL, search, search_len, type)) { | 3506 | if (ftrace_match_record(rec, NULL, search, search_len, type)) { |
| 3277 | /* if it is in the array */ | 3507 | /* if it is in the array */ |
| 3278 | exists = false; | 3508 | exists = false; |
| @@ -3381,15 +3611,62 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) | |||
| 3381 | return 0; | 3611 | return 0; |
| 3382 | } | 3612 | } |
| 3383 | 3613 | ||
| 3614 | static void ftrace_swap_recs(void *a, void *b, int size) | ||
| 3615 | { | ||
| 3616 | struct dyn_ftrace *reca = a; | ||
| 3617 | struct dyn_ftrace *recb = b; | ||
| 3618 | struct dyn_ftrace t; | ||
| 3619 | |||
| 3620 | t = *reca; | ||
| 3621 | *reca = *recb; | ||
| 3622 | *recb = t; | ||
| 3623 | } | ||
| 3624 | |||
| 3384 | static int ftrace_process_locs(struct module *mod, | 3625 | static int ftrace_process_locs(struct module *mod, |
| 3385 | unsigned long *start, | 3626 | unsigned long *start, |
| 3386 | unsigned long *end) | 3627 | unsigned long *end) |
| 3387 | { | 3628 | { |
| 3629 | struct ftrace_page *pg; | ||
| 3630 | unsigned long count; | ||
| 3388 | unsigned long *p; | 3631 | unsigned long *p; |
| 3389 | unsigned long addr; | 3632 | unsigned long addr; |
| 3390 | unsigned long flags = 0; /* Shut up gcc */ | 3633 | unsigned long flags = 0; /* Shut up gcc */ |
| 3634 | int ret = -ENOMEM; | ||
| 3635 | |||
| 3636 | count = end - start; | ||
| 3637 | |||
| 3638 | if (!count) | ||
| 3639 | return 0; | ||
| 3640 | |||
| 3641 | pg = ftrace_allocate_pages(count); | ||
| 3642 | if (!pg) | ||
| 3643 | return -ENOMEM; | ||
| 3391 | 3644 | ||
| 3392 | mutex_lock(&ftrace_lock); | 3645 | mutex_lock(&ftrace_lock); |
| 3646 | |||
| 3647 | /* | ||
| 3648 | * Core and each module needs their own pages, as | ||
| 3649 | * modules will free them when they are removed. | ||
| 3650 | * Force a new page to be allocated for modules. | ||
| 3651 | */ | ||
| 3652 | if (!mod) { | ||
| 3653 | WARN_ON(ftrace_pages || ftrace_pages_start); | ||
| 3654 | /* First initialization */ | ||
| 3655 | ftrace_pages = ftrace_pages_start = pg; | ||
| 3656 | } else { | ||
| 3657 | if (!ftrace_pages) | ||
| 3658 | goto out; | ||
| 3659 | |||
| 3660 | if (WARN_ON(ftrace_pages->next)) { | ||
| 3661 | /* Hmm, we have free pages? */ | ||
| 3662 | while (ftrace_pages->next) | ||
| 3663 | ftrace_pages = ftrace_pages->next; | ||
| 3664 | } | ||
| 3665 | |||
| 3666 | ftrace_pages->next = pg; | ||
| 3667 | ftrace_pages = pg; | ||
| 3668 | } | ||
| 3669 | |||
| 3393 | p = start; | 3670 | p = start; |
| 3394 | while (p < end) { | 3671 | while (p < end) { |
| 3395 | addr = ftrace_call_adjust(*p++); | 3672 | addr = ftrace_call_adjust(*p++); |
| @@ -3401,9 +3678,18 @@ static int ftrace_process_locs(struct module *mod, | |||
| 3401 | */ | 3678 | */ |
| 3402 | if (!addr) | 3679 | if (!addr) |
| 3403 | continue; | 3680 | continue; |
| 3404 | ftrace_record_ip(addr); | 3681 | if (!ftrace_record_ip(addr)) |
| 3682 | break; | ||
| 3405 | } | 3683 | } |
| 3406 | 3684 | ||
| 3685 | /* These new locations need to be initialized */ | ||
| 3686 | ftrace_new_pgs = pg; | ||
| 3687 | |||
| 3688 | /* Make each individual set of pages sorted by ips */ | ||
| 3689 | for (; pg; pg = pg->next) | ||
| 3690 | sort(pg->records, pg->index, sizeof(struct dyn_ftrace), | ||
| 3691 | ftrace_cmp_recs, ftrace_swap_recs); | ||
| 3692 | |||
| 3407 | /* | 3693 | /* |
| 3408 | * We only need to disable interrupts on start up | 3694 | * We only need to disable interrupts on start up |
| 3409 | * because we are modifying code that an interrupt | 3695 | * because we are modifying code that an interrupt |
| @@ -3417,32 +3703,55 @@ static int ftrace_process_locs(struct module *mod, | |||
| 3417 | ftrace_update_code(mod); | 3703 | ftrace_update_code(mod); |
| 3418 | if (!mod) | 3704 | if (!mod) |
| 3419 | local_irq_restore(flags); | 3705 | local_irq_restore(flags); |
| 3706 | ret = 0; | ||
| 3707 | out: | ||
| 3420 | mutex_unlock(&ftrace_lock); | 3708 | mutex_unlock(&ftrace_lock); |
| 3421 | 3709 | ||
| 3422 | return 0; | 3710 | return ret; |
| 3423 | } | 3711 | } |
| 3424 | 3712 | ||
| 3425 | #ifdef CONFIG_MODULES | 3713 | #ifdef CONFIG_MODULES |
| 3714 | |||
| 3715 | #define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) | ||
| 3716 | |||
| 3426 | void ftrace_release_mod(struct module *mod) | 3717 | void ftrace_release_mod(struct module *mod) |
| 3427 | { | 3718 | { |
| 3428 | struct dyn_ftrace *rec; | 3719 | struct dyn_ftrace *rec; |
| 3720 | struct ftrace_page **last_pg; | ||
| 3429 | struct ftrace_page *pg; | 3721 | struct ftrace_page *pg; |
| 3722 | int order; | ||
| 3430 | 3723 | ||
| 3431 | mutex_lock(&ftrace_lock); | 3724 | mutex_lock(&ftrace_lock); |
| 3432 | 3725 | ||
| 3433 | if (ftrace_disabled) | 3726 | if (ftrace_disabled) |
| 3434 | goto out_unlock; | 3727 | goto out_unlock; |
| 3435 | 3728 | ||
| 3436 | do_for_each_ftrace_rec(pg, rec) { | 3729 | /* |
| 3730 | * Each module has its own ftrace_pages, remove | ||
| 3731 | * them from the list. | ||
| 3732 | */ | ||
| 3733 | last_pg = &ftrace_pages_start; | ||
| 3734 | for (pg = ftrace_pages_start; pg; pg = *last_pg) { | ||
| 3735 | rec = &pg->records[0]; | ||
| 3437 | if (within_module_core(rec->ip, mod)) { | 3736 | if (within_module_core(rec->ip, mod)) { |
| 3438 | /* | 3737 | /* |
| 3439 | * rec->ip is changed in ftrace_free_rec() | 3738 | * As core pages are first, the first |
| 3440 | * It should not between s and e if record was freed. | 3739 | * page should never be a module page. |
| 3441 | */ | 3740 | */ |
| 3442 | FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); | 3741 | if (WARN_ON(pg == ftrace_pages_start)) |
| 3443 | ftrace_free_rec(rec); | 3742 | goto out_unlock; |
| 3444 | } | 3743 | |
| 3445 | } while_for_each_ftrace_rec(); | 3744 | /* Check if we are deleting the last page */ |
| 3745 | if (pg == ftrace_pages) | ||
| 3746 | ftrace_pages = next_to_ftrace_page(last_pg); | ||
| 3747 | |||
| 3748 | *last_pg = pg->next; | ||
| 3749 | order = get_count_order(pg->size / ENTRIES_PER_PAGE); | ||
| 3750 | free_pages((unsigned long)pg->records, order); | ||
| 3751 | kfree(pg); | ||
| 3752 | } else | ||
| 3753 | last_pg = &pg->next; | ||
| 3754 | } | ||
| 3446 | out_unlock: | 3755 | out_unlock: |
| 3447 | mutex_unlock(&ftrace_lock); | 3756 | mutex_unlock(&ftrace_lock); |
| 3448 | } | 3757 | } |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 91dc4bc8bf72..a3f1bc5d2a00 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -4438,7 +4438,7 @@ static const struct file_operations trace_options_core_fops = { | |||
| 4438 | }; | 4438 | }; |
| 4439 | 4439 | ||
| 4440 | struct dentry *trace_create_file(const char *name, | 4440 | struct dentry *trace_create_file(const char *name, |
| 4441 | mode_t mode, | 4441 | umode_t mode, |
| 4442 | struct dentry *parent, | 4442 | struct dentry *parent, |
| 4443 | void *data, | 4443 | void *data, |
| 4444 | const struct file_operations *fops) | 4444 | const struct file_operations *fops) |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2c2657462ac3..b93ecbadad6d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -312,7 +312,7 @@ void tracing_reset_current(int cpu); | |||
| 312 | void tracing_reset_current_online_cpus(void); | 312 | void tracing_reset_current_online_cpus(void); |
| 313 | int tracing_open_generic(struct inode *inode, struct file *filp); | 313 | int tracing_open_generic(struct inode *inode, struct file *filp); |
| 314 | struct dentry *trace_create_file(const char *name, | 314 | struct dentry *trace_create_file(const char *name, |
| 315 | mode_t mode, | 315 | umode_t mode, |
| 316 | struct dentry *parent, | 316 | struct dentry *parent, |
| 317 | void *data, | 317 | void *data, |
| 318 | const struct file_operations *fops); | 318 | const struct file_operations *fops); |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index f04cc3136bd3..24aee7127451 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
| @@ -1738,11 +1738,121 @@ static int replace_system_preds(struct event_subsystem *system, | |||
| 1738 | return -ENOMEM; | 1738 | return -ENOMEM; |
| 1739 | } | 1739 | } |
| 1740 | 1740 | ||
| 1741 | static int create_filter_start(char *filter_str, bool set_str, | ||
| 1742 | struct filter_parse_state **psp, | ||
| 1743 | struct event_filter **filterp) | ||
| 1744 | { | ||
| 1745 | struct event_filter *filter; | ||
| 1746 | struct filter_parse_state *ps = NULL; | ||
| 1747 | int err = 0; | ||
| 1748 | |||
| 1749 | WARN_ON_ONCE(*psp || *filterp); | ||
| 1750 | |||
| 1751 | /* allocate everything, and if any fails, free all and fail */ | ||
| 1752 | filter = __alloc_filter(); | ||
| 1753 | if (filter && set_str) | ||
| 1754 | err = replace_filter_string(filter, filter_str); | ||
| 1755 | |||
| 1756 | ps = kzalloc(sizeof(*ps), GFP_KERNEL); | ||
| 1757 | |||
| 1758 | if (!filter || !ps || err) { | ||
| 1759 | kfree(ps); | ||
| 1760 | __free_filter(filter); | ||
| 1761 | return -ENOMEM; | ||
| 1762 | } | ||
| 1763 | |||
| 1764 | /* we're committed to creating a new filter */ | ||
| 1765 | *filterp = filter; | ||
| 1766 | *psp = ps; | ||
| 1767 | |||
| 1768 | parse_init(ps, filter_ops, filter_str); | ||
| 1769 | err = filter_parse(ps); | ||
| 1770 | if (err && set_str) | ||
| 1771 | append_filter_err(ps, filter); | ||
| 1772 | return err; | ||
| 1773 | } | ||
| 1774 | |||
| 1775 | static void create_filter_finish(struct filter_parse_state *ps) | ||
| 1776 | { | ||
| 1777 | if (ps) { | ||
| 1778 | filter_opstack_clear(ps); | ||
| 1779 | postfix_clear(ps); | ||
| 1780 | kfree(ps); | ||
| 1781 | } | ||
| 1782 | } | ||
| 1783 | |||
| 1784 | /** | ||
| 1785 | * create_filter - create a filter for a ftrace_event_call | ||
| 1786 | * @call: ftrace_event_call to create a filter for | ||
| 1787 | * @filter_str: filter string | ||
| 1788 | * @set_str: remember @filter_str and enable detailed error in filter | ||
| 1789 | * @filterp: out param for created filter (always updated on return) | ||
| 1790 | * | ||
| 1791 | * Creates a filter for @call with @filter_str. If @set_str is %true, | ||
| 1792 | * @filter_str is copied and recorded in the new filter. | ||
| 1793 | * | ||
| 1794 | * On success, returns 0 and *@filterp points to the new filter. On | ||
| 1795 | * failure, returns -errno and *@filterp may point to %NULL or to a new | ||
| 1796 | * filter. In the latter case, the returned filter contains error | ||
| 1797 | * information if @set_str is %true and the caller is responsible for | ||
| 1798 | * freeing it. | ||
| 1799 | */ | ||
| 1800 | static int create_filter(struct ftrace_event_call *call, | ||
| 1801 | char *filter_str, bool set_str, | ||
| 1802 | struct event_filter **filterp) | ||
| 1803 | { | ||
| 1804 | struct event_filter *filter = NULL; | ||
| 1805 | struct filter_parse_state *ps = NULL; | ||
| 1806 | int err; | ||
| 1807 | |||
| 1808 | err = create_filter_start(filter_str, set_str, &ps, &filter); | ||
| 1809 | if (!err) { | ||
| 1810 | err = replace_preds(call, filter, ps, filter_str, false); | ||
| 1811 | if (err && set_str) | ||
| 1812 | append_filter_err(ps, filter); | ||
| 1813 | } | ||
| 1814 | create_filter_finish(ps); | ||
| 1815 | |||
| 1816 | *filterp = filter; | ||
| 1817 | return err; | ||
| 1818 | } | ||
| 1819 | |||
| 1820 | /** | ||
| 1821 | * create_system_filter - create a filter for an event_subsystem | ||
| 1822 | * @system: event_subsystem to create a filter for | ||
| 1823 | * @filter_str: filter string | ||
| 1824 | * @filterp: out param for created filter (always updated on return) | ||
| 1825 | * | ||
| 1826 | * Identical to create_filter() except that it creates a subsystem filter | ||
| 1827 | * and always remembers @filter_str. | ||
| 1828 | */ | ||
| 1829 | static int create_system_filter(struct event_subsystem *system, | ||
| 1830 | char *filter_str, struct event_filter **filterp) | ||
| 1831 | { | ||
| 1832 | struct event_filter *filter = NULL; | ||
| 1833 | struct filter_parse_state *ps = NULL; | ||
| 1834 | int err; | ||
| 1835 | |||
| 1836 | err = create_filter_start(filter_str, true, &ps, &filter); | ||
| 1837 | if (!err) { | ||
| 1838 | err = replace_system_preds(system, ps, filter_str); | ||
| 1839 | if (!err) { | ||
| 1840 | /* System filters just show a default message */ | ||
| 1841 | kfree(filter->filter_string); | ||
| 1842 | filter->filter_string = NULL; | ||
| 1843 | } else { | ||
| 1844 | append_filter_err(ps, filter); | ||
| 1845 | } | ||
| 1846 | } | ||
| 1847 | create_filter_finish(ps); | ||
| 1848 | |||
| 1849 | *filterp = filter; | ||
| 1850 | return err; | ||
| 1851 | } | ||
| 1852 | |||
| 1741 | int apply_event_filter(struct ftrace_event_call *call, char *filter_string) | 1853 | int apply_event_filter(struct ftrace_event_call *call, char *filter_string) |
| 1742 | { | 1854 | { |
| 1743 | struct filter_parse_state *ps; | ||
| 1744 | struct event_filter *filter; | 1855 | struct event_filter *filter; |
| 1745 | struct event_filter *tmp; | ||
| 1746 | int err = 0; | 1856 | int err = 0; |
| 1747 | 1857 | ||
| 1748 | mutex_lock(&event_mutex); | 1858 | mutex_lock(&event_mutex); |
| @@ -1759,49 +1869,30 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) | |||
| 1759 | goto out_unlock; | 1869 | goto out_unlock; |
| 1760 | } | 1870 | } |
| 1761 | 1871 | ||
| 1762 | err = -ENOMEM; | 1872 | err = create_filter(call, filter_string, true, &filter); |
| 1763 | ps = kzalloc(sizeof(*ps), GFP_KERNEL); | ||
| 1764 | if (!ps) | ||
| 1765 | goto out_unlock; | ||
| 1766 | |||
| 1767 | filter = __alloc_filter(); | ||
| 1768 | if (!filter) { | ||
| 1769 | kfree(ps); | ||
| 1770 | goto out_unlock; | ||
| 1771 | } | ||
| 1772 | |||
| 1773 | replace_filter_string(filter, filter_string); | ||
| 1774 | |||
| 1775 | parse_init(ps, filter_ops, filter_string); | ||
| 1776 | err = filter_parse(ps); | ||
| 1777 | if (err) { | ||
| 1778 | append_filter_err(ps, filter); | ||
| 1779 | goto out; | ||
| 1780 | } | ||
| 1781 | 1873 | ||
| 1782 | err = replace_preds(call, filter, ps, filter_string, false); | ||
| 1783 | if (err) { | ||
| 1784 | filter_disable(call); | ||
| 1785 | append_filter_err(ps, filter); | ||
| 1786 | } else | ||
| 1787 | call->flags |= TRACE_EVENT_FL_FILTERED; | ||
| 1788 | out: | ||
| 1789 | /* | 1874 | /* |
| 1790 | * Always swap the call filter with the new filter | 1875 | * Always swap the call filter with the new filter |
| 1791 | * even if there was an error. If there was an error | 1876 | * even if there was an error. If there was an error |
| 1792 | * in the filter, we disable the filter and show the error | 1877 | * in the filter, we disable the filter and show the error |
| 1793 | * string | 1878 | * string |
| 1794 | */ | 1879 | */ |
| 1795 | tmp = call->filter; | 1880 | if (filter) { |
| 1796 | rcu_assign_pointer(call->filter, filter); | 1881 | struct event_filter *tmp = call->filter; |
| 1797 | if (tmp) { | 1882 | |
| 1798 | /* Make sure the call is done with the filter */ | 1883 | if (!err) |
| 1799 | synchronize_sched(); | 1884 | call->flags |= TRACE_EVENT_FL_FILTERED; |
| 1800 | __free_filter(tmp); | 1885 | else |
| 1886 | filter_disable(call); | ||
| 1887 | |||
| 1888 | rcu_assign_pointer(call->filter, filter); | ||
| 1889 | |||
| 1890 | if (tmp) { | ||
| 1891 | /* Make sure the call is done with the filter */ | ||
| 1892 | synchronize_sched(); | ||
| 1893 | __free_filter(tmp); | ||
| 1894 | } | ||
| 1801 | } | 1895 | } |
| 1802 | filter_opstack_clear(ps); | ||
| 1803 | postfix_clear(ps); | ||
| 1804 | kfree(ps); | ||
| 1805 | out_unlock: | 1896 | out_unlock: |
| 1806 | mutex_unlock(&event_mutex); | 1897 | mutex_unlock(&event_mutex); |
| 1807 | 1898 | ||
| @@ -1811,7 +1902,6 @@ out_unlock: | |||
| 1811 | int apply_subsystem_event_filter(struct event_subsystem *system, | 1902 | int apply_subsystem_event_filter(struct event_subsystem *system, |
| 1812 | char *filter_string) | 1903 | char *filter_string) |
| 1813 | { | 1904 | { |
| 1814 | struct filter_parse_state *ps; | ||
| 1815 | struct event_filter *filter; | 1905 | struct event_filter *filter; |
| 1816 | int err = 0; | 1906 | int err = 0; |
| 1817 | 1907 | ||
| @@ -1835,48 +1925,19 @@ int apply_subsystem_event_filter(struct event_subsystem *system, | |||
| 1835 | goto out_unlock; | 1925 | goto out_unlock; |
| 1836 | } | 1926 | } |
| 1837 | 1927 | ||
| 1838 | err = -ENOMEM; | 1928 | err = create_system_filter(system, filter_string, &filter); |
| 1839 | ps = kzalloc(sizeof(*ps), GFP_KERNEL); | 1929 | if (filter) { |
| 1840 | if (!ps) | 1930 | /* |
| 1841 | goto out_unlock; | 1931 | * No event actually uses the system filter |
| 1842 | 1932 | * we can free it without synchronize_sched(). | |
| 1843 | filter = __alloc_filter(); | 1933 | */ |
| 1844 | if (!filter) | 1934 | __free_filter(system->filter); |
| 1845 | goto out; | 1935 | system->filter = filter; |
| 1846 | 1936 | } | |
| 1847 | /* System filters just show a default message */ | ||
| 1848 | kfree(filter->filter_string); | ||
| 1849 | filter->filter_string = NULL; | ||
| 1850 | |||
| 1851 | /* | ||
| 1852 | * No event actually uses the system filter | ||
| 1853 | * we can free it without synchronize_sched(). | ||
| 1854 | */ | ||
| 1855 | __free_filter(system->filter); | ||
| 1856 | system->filter = filter; | ||
| 1857 | |||
| 1858 | parse_init(ps, filter_ops, filter_string); | ||
| 1859 | err = filter_parse(ps); | ||
| 1860 | if (err) | ||
| 1861 | goto err_filter; | ||
| 1862 | |||
| 1863 | err = replace_system_preds(system, ps, filter_string); | ||
| 1864 | if (err) | ||
| 1865 | goto err_filter; | ||
| 1866 | |||
| 1867 | out: | ||
| 1868 | filter_opstack_clear(ps); | ||
| 1869 | postfix_clear(ps); | ||
| 1870 | kfree(ps); | ||
| 1871 | out_unlock: | 1937 | out_unlock: |
| 1872 | mutex_unlock(&event_mutex); | 1938 | mutex_unlock(&event_mutex); |
| 1873 | 1939 | ||
| 1874 | return err; | 1940 | return err; |
| 1875 | |||
| 1876 | err_filter: | ||
| 1877 | replace_filter_string(filter, filter_string); | ||
| 1878 | append_filter_err(ps, system->filter); | ||
| 1879 | goto out; | ||
| 1880 | } | 1941 | } |
| 1881 | 1942 | ||
| 1882 | #ifdef CONFIG_PERF_EVENTS | 1943 | #ifdef CONFIG_PERF_EVENTS |
| @@ -1894,7 +1955,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, | |||
| 1894 | { | 1955 | { |
| 1895 | int err; | 1956 | int err; |
| 1896 | struct event_filter *filter; | 1957 | struct event_filter *filter; |
| 1897 | struct filter_parse_state *ps; | ||
| 1898 | struct ftrace_event_call *call; | 1958 | struct ftrace_event_call *call; |
| 1899 | 1959 | ||
| 1900 | mutex_lock(&event_mutex); | 1960 | mutex_lock(&event_mutex); |
| @@ -1909,33 +1969,10 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, | |||
| 1909 | if (event->filter) | 1969 | if (event->filter) |
| 1910 | goto out_unlock; | 1970 | goto out_unlock; |
| 1911 | 1971 | ||
| 1912 | filter = __alloc_filter(); | 1972 | err = create_filter(call, filter_str, false, &filter); |
| 1913 | if (!filter) { | ||
| 1914 | err = PTR_ERR(filter); | ||
| 1915 | goto out_unlock; | ||
| 1916 | } | ||
| 1917 | |||
| 1918 | err = -ENOMEM; | ||
| 1919 | ps = kzalloc(sizeof(*ps), GFP_KERNEL); | ||
| 1920 | if (!ps) | ||
| 1921 | goto free_filter; | ||
| 1922 | |||
| 1923 | parse_init(ps, filter_ops, filter_str); | ||
| 1924 | err = filter_parse(ps); | ||
| 1925 | if (err) | ||
| 1926 | goto free_ps; | ||
| 1927 | |||
| 1928 | err = replace_preds(call, filter, ps, filter_str, false); | ||
| 1929 | if (!err) | 1973 | if (!err) |
| 1930 | event->filter = filter; | 1974 | event->filter = filter; |
| 1931 | 1975 | else | |
| 1932 | free_ps: | ||
| 1933 | filter_opstack_clear(ps); | ||
| 1934 | postfix_clear(ps); | ||
| 1935 | kfree(ps); | ||
| 1936 | |||
| 1937 | free_filter: | ||
| 1938 | if (err) | ||
| 1939 | __free_filter(filter); | 1976 | __free_filter(filter); |
| 1940 | 1977 | ||
| 1941 | out_unlock: | 1978 | out_unlock: |
| @@ -1954,43 +1991,6 @@ out_unlock: | |||
| 1954 | #define CREATE_TRACE_POINTS | 1991 | #define CREATE_TRACE_POINTS |
| 1955 | #include "trace_events_filter_test.h" | 1992 | #include "trace_events_filter_test.h" |
| 1956 | 1993 | ||
| 1957 | static int test_get_filter(char *filter_str, struct ftrace_event_call *call, | ||
| 1958 | struct event_filter **pfilter) | ||
| 1959 | { | ||
| 1960 | struct event_filter *filter; | ||
| 1961 | struct filter_parse_state *ps; | ||
| 1962 | int err = -ENOMEM; | ||
| 1963 | |||
| 1964 | filter = __alloc_filter(); | ||
| 1965 | if (!filter) | ||
| 1966 | goto out; | ||
| 1967 | |||
| 1968 | ps = kzalloc(sizeof(*ps), GFP_KERNEL); | ||
| 1969 | if (!ps) | ||
| 1970 | goto free_filter; | ||
| 1971 | |||
| 1972 | parse_init(ps, filter_ops, filter_str); | ||
| 1973 | err = filter_parse(ps); | ||
| 1974 | if (err) | ||
| 1975 | goto free_ps; | ||
| 1976 | |||
| 1977 | err = replace_preds(call, filter, ps, filter_str, false); | ||
| 1978 | if (!err) | ||
| 1979 | *pfilter = filter; | ||
| 1980 | |||
| 1981 | free_ps: | ||
| 1982 | filter_opstack_clear(ps); | ||
| 1983 | postfix_clear(ps); | ||
| 1984 | kfree(ps); | ||
| 1985 | |||
| 1986 | free_filter: | ||
| 1987 | if (err) | ||
| 1988 | __free_filter(filter); | ||
| 1989 | |||
| 1990 | out: | ||
| 1991 | return err; | ||
| 1992 | } | ||
| 1993 | |||
| 1994 | #define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \ | 1994 | #define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \ |
| 1995 | { \ | 1995 | { \ |
| 1996 | .filter = FILTER, \ | 1996 | .filter = FILTER, \ |
| @@ -2109,12 +2109,13 @@ static __init int ftrace_test_event_filter(void) | |||
| 2109 | struct test_filter_data_t *d = &test_filter_data[i]; | 2109 | struct test_filter_data_t *d = &test_filter_data[i]; |
| 2110 | int err; | 2110 | int err; |
| 2111 | 2111 | ||
| 2112 | err = test_get_filter(d->filter, &event_ftrace_test_filter, | 2112 | err = create_filter(&event_ftrace_test_filter, d->filter, |
| 2113 | &filter); | 2113 | false, &filter); |
| 2114 | if (err) { | 2114 | if (err) { |
| 2115 | printk(KERN_INFO | 2115 | printk(KERN_INFO |
| 2116 | "Failed to get filter for '%s', err %d\n", | 2116 | "Failed to get filter for '%s', err %d\n", |
| 2117 | d->filter, err); | 2117 | d->filter, err); |
| 2118 | __free_filter(filter); | ||
| 2118 | break; | 2119 | break; |
| 2119 | } | 2120 | } |
| 2120 | 2121 | ||
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 77575b386d97..d4545f49242e 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
| @@ -13,6 +13,9 @@ | |||
| 13 | #include <linux/sysctl.h> | 13 | #include <linux/sysctl.h> |
| 14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
| 15 | #include <linux/fs.h> | 15 | #include <linux/fs.h> |
| 16 | |||
| 17 | #include <asm/setup.h> | ||
| 18 | |||
| 16 | #include "trace.h" | 19 | #include "trace.h" |
| 17 | 20 | ||
| 18 | #define STACK_TRACE_ENTRIES 500 | 21 | #define STACK_TRACE_ENTRIES 500 |
| @@ -133,7 +136,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) | |||
| 133 | static struct ftrace_ops trace_ops __read_mostly = | 136 | static struct ftrace_ops trace_ops __read_mostly = |
| 134 | { | 137 | { |
| 135 | .func = stack_trace_call, | 138 | .func = stack_trace_call, |
| 136 | .flags = FTRACE_OPS_FL_GLOBAL, | ||
| 137 | }; | 139 | }; |
| 138 | 140 | ||
| 139 | static ssize_t | 141 | static ssize_t |
| @@ -311,6 +313,21 @@ static const struct file_operations stack_trace_fops = { | |||
| 311 | .release = seq_release, | 313 | .release = seq_release, |
| 312 | }; | 314 | }; |
| 313 | 315 | ||
| 316 | static int | ||
| 317 | stack_trace_filter_open(struct inode *inode, struct file *file) | ||
| 318 | { | ||
| 319 | return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER, | ||
| 320 | inode, file); | ||
| 321 | } | ||
| 322 | |||
| 323 | static const struct file_operations stack_trace_filter_fops = { | ||
| 324 | .open = stack_trace_filter_open, | ||
| 325 | .read = seq_read, | ||
| 326 | .write = ftrace_filter_write, | ||
| 327 | .llseek = ftrace_regex_lseek, | ||
| 328 | .release = ftrace_regex_release, | ||
| 329 | }; | ||
| 330 | |||
| 314 | int | 331 | int |
| 315 | stack_trace_sysctl(struct ctl_table *table, int write, | 332 | stack_trace_sysctl(struct ctl_table *table, int write, |
| 316 | void __user *buffer, size_t *lenp, | 333 | void __user *buffer, size_t *lenp, |
| @@ -338,8 +355,13 @@ stack_trace_sysctl(struct ctl_table *table, int write, | |||
| 338 | return ret; | 355 | return ret; |
| 339 | } | 356 | } |
| 340 | 357 | ||
| 358 | static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata; | ||
| 359 | |||
| 341 | static __init int enable_stacktrace(char *str) | 360 | static __init int enable_stacktrace(char *str) |
| 342 | { | 361 | { |
| 362 | if (strncmp(str, "_filter=", 8) == 0) | ||
| 363 | strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE); | ||
| 364 | |||
| 343 | stack_tracer_enabled = 1; | 365 | stack_tracer_enabled = 1; |
| 344 | last_stack_tracer_enabled = 1; | 366 | last_stack_tracer_enabled = 1; |
| 345 | return 1; | 367 | return 1; |
| @@ -358,6 +380,12 @@ static __init int stack_trace_init(void) | |||
| 358 | trace_create_file("stack_trace", 0444, d_tracer, | 380 | trace_create_file("stack_trace", 0444, d_tracer, |
| 359 | NULL, &stack_trace_fops); | 381 | NULL, &stack_trace_fops); |
| 360 | 382 | ||
| 383 | trace_create_file("stack_trace_filter", 0444, d_tracer, | ||
| 384 | NULL, &stack_trace_filter_fops); | ||
| 385 | |||
| 386 | if (stack_trace_filter_buf[0]) | ||
| 387 | ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1); | ||
| 388 | |||
| 361 | if (stack_tracer_enabled) | 389 | if (stack_tracer_enabled) |
| 362 | register_ftrace_function(&trace_ops); | 390 | register_ftrace_function(&trace_ops); |
| 363 | 391 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 42fa9ad0a810..bec7b5b53e03 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -242,10 +242,10 @@ struct workqueue_struct { | |||
| 242 | 242 | ||
| 243 | int nr_drainers; /* W: drain in progress */ | 243 | int nr_drainers; /* W: drain in progress */ |
| 244 | int saved_max_active; /* W: saved cwq max_active */ | 244 | int saved_max_active; /* W: saved cwq max_active */ |
| 245 | const char *name; /* I: workqueue name */ | ||
| 246 | #ifdef CONFIG_LOCKDEP | 245 | #ifdef CONFIG_LOCKDEP |
| 247 | struct lockdep_map lockdep_map; | 246 | struct lockdep_map lockdep_map; |
| 248 | #endif | 247 | #endif |
| 248 | char name[]; /* I: workqueue name */ | ||
| 249 | }; | 249 | }; |
| 250 | 250 | ||
| 251 | struct workqueue_struct *system_wq __read_mostly; | 251 | struct workqueue_struct *system_wq __read_mostly; |
| @@ -2954,14 +2954,29 @@ static int wq_clamp_max_active(int max_active, unsigned int flags, | |||
| 2954 | return clamp_val(max_active, 1, lim); | 2954 | return clamp_val(max_active, 1, lim); |
| 2955 | } | 2955 | } |
| 2956 | 2956 | ||
| 2957 | struct workqueue_struct *__alloc_workqueue_key(const char *name, | 2957 | struct workqueue_struct *__alloc_workqueue_key(const char *fmt, |
| 2958 | unsigned int flags, | 2958 | unsigned int flags, |
| 2959 | int max_active, | 2959 | int max_active, |
| 2960 | struct lock_class_key *key, | 2960 | struct lock_class_key *key, |
| 2961 | const char *lock_name) | 2961 | const char *lock_name, ...) |
| 2962 | { | 2962 | { |
| 2963 | va_list args, args1; | ||
| 2963 | struct workqueue_struct *wq; | 2964 | struct workqueue_struct *wq; |
| 2964 | unsigned int cpu; | 2965 | unsigned int cpu; |
| 2966 | size_t namelen; | ||
| 2967 | |||
| 2968 | /* determine namelen, allocate wq and format name */ | ||
| 2969 | va_start(args, lock_name); | ||
| 2970 | va_copy(args1, args); | ||
| 2971 | namelen = vsnprintf(NULL, 0, fmt, args) + 1; | ||
| 2972 | |||
| 2973 | wq = kzalloc(sizeof(*wq) + namelen, GFP_KERNEL); | ||
| 2974 | if (!wq) | ||
| 2975 | goto err; | ||
| 2976 | |||
| 2977 | vsnprintf(wq->name, namelen, fmt, args1); | ||
| 2978 | va_end(args); | ||
| 2979 | va_end(args1); | ||
| 2965 | 2980 | ||
| 2966 | /* | 2981 | /* |
| 2967 | * Workqueues which may be used during memory reclaim should | 2982 | * Workqueues which may be used during memory reclaim should |
| @@ -2978,12 +2993,9 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name, | |||
| 2978 | flags |= WQ_HIGHPRI; | 2993 | flags |= WQ_HIGHPRI; |
| 2979 | 2994 | ||
| 2980 | max_active = max_active ?: WQ_DFL_ACTIVE; | 2995 | max_active = max_active ?: WQ_DFL_ACTIVE; |
| 2981 | max_active = wq_clamp_max_active(max_active, flags, name); | 2996 | max_active = wq_clamp_max_active(max_active, flags, wq->name); |
| 2982 | |||
| 2983 | wq = kzalloc(sizeof(*wq), GFP_KERNEL); | ||
| 2984 | if (!wq) | ||
| 2985 | goto err; | ||
| 2986 | 2997 | ||
| 2998 | /* init wq */ | ||
| 2987 | wq->flags = flags; | 2999 | wq->flags = flags; |
| 2988 | wq->saved_max_active = max_active; | 3000 | wq->saved_max_active = max_active; |
| 2989 | mutex_init(&wq->flush_mutex); | 3001 | mutex_init(&wq->flush_mutex); |
| @@ -2991,7 +3003,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name, | |||
| 2991 | INIT_LIST_HEAD(&wq->flusher_queue); | 3003 | INIT_LIST_HEAD(&wq->flusher_queue); |
| 2992 | INIT_LIST_HEAD(&wq->flusher_overflow); | 3004 | INIT_LIST_HEAD(&wq->flusher_overflow); |
| 2993 | 3005 | ||
| 2994 | wq->name = name; | ||
| 2995 | lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); | 3006 | lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); |
| 2996 | INIT_LIST_HEAD(&wq->list); | 3007 | INIT_LIST_HEAD(&wq->list); |
| 2997 | 3008 | ||
| @@ -3020,7 +3031,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name, | |||
| 3020 | if (!rescuer) | 3031 | if (!rescuer) |
| 3021 | goto err; | 3032 | goto err; |
| 3022 | 3033 | ||
| 3023 | rescuer->task = kthread_create(rescuer_thread, wq, "%s", name); | 3034 | rescuer->task = kthread_create(rescuer_thread, wq, "%s", |
| 3035 | wq->name); | ||
| 3024 | if (IS_ERR(rescuer->task)) | 3036 | if (IS_ERR(rescuer->task)) |
| 3025 | goto err; | 3037 | goto err; |
| 3026 | 3038 | ||
