diff options
Diffstat (limited to 'kernel')
63 files changed, 1061 insertions, 576 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 353d3fe8ba33..85cbfb31e73e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o | |||
107 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o | 107 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o |
108 | obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o | 108 | obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o |
109 | obj-$(CONFIG_PADATA) += padata.o | 109 | obj-$(CONFIG_PADATA) += padata.o |
110 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | ||
110 | 111 | ||
111 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 112 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
112 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 113 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/audit.c b/kernel/audit.c index e4956244ae50..939500317066 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -74,6 +74,8 @@ static int audit_initialized; | |||
74 | int audit_enabled; | 74 | int audit_enabled; |
75 | int audit_ever_enabled; | 75 | int audit_ever_enabled; |
76 | 76 | ||
77 | EXPORT_SYMBOL_GPL(audit_enabled); | ||
78 | |||
77 | /* Default state when kernel boots without any parameters. */ | 79 | /* Default state when kernel boots without any parameters. */ |
78 | static int audit_default; | 80 | static int audit_default; |
79 | 81 | ||
@@ -671,9 +673,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
671 | 673 | ||
672 | pid = NETLINK_CREDS(skb)->pid; | 674 | pid = NETLINK_CREDS(skb)->pid; |
673 | uid = NETLINK_CREDS(skb)->uid; | 675 | uid = NETLINK_CREDS(skb)->uid; |
674 | loginuid = NETLINK_CB(skb).loginuid; | 676 | loginuid = audit_get_loginuid(current); |
675 | sessionid = NETLINK_CB(skb).sessionid; | 677 | sessionid = audit_get_sessionid(current); |
676 | sid = NETLINK_CB(skb).sid; | 678 | security_task_getsecid(current, &sid); |
677 | seq = nlh->nlmsg_seq; | 679 | seq = nlh->nlmsg_seq; |
678 | data = NLMSG_DATA(nlh); | 680 | data = NLMSG_DATA(nlh); |
679 | 681 | ||
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index add2819af71b..f8277c80d678 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -1238,6 +1238,7 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, | |||
1238 | for (i = 0; i < rule->field_count; i++) { | 1238 | for (i = 0; i < rule->field_count; i++) { |
1239 | struct audit_field *f = &rule->fields[i]; | 1239 | struct audit_field *f = &rule->fields[i]; |
1240 | int result = 0; | 1240 | int result = 0; |
1241 | u32 sid; | ||
1241 | 1242 | ||
1242 | switch (f->type) { | 1243 | switch (f->type) { |
1243 | case AUDIT_PID: | 1244 | case AUDIT_PID: |
@@ -1250,19 +1251,22 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, | |||
1250 | result = audit_comparator(cb->creds.gid, f->op, f->val); | 1251 | result = audit_comparator(cb->creds.gid, f->op, f->val); |
1251 | break; | 1252 | break; |
1252 | case AUDIT_LOGINUID: | 1253 | case AUDIT_LOGINUID: |
1253 | result = audit_comparator(cb->loginuid, f->op, f->val); | 1254 | result = audit_comparator(audit_get_loginuid(current), |
1255 | f->op, f->val); | ||
1254 | break; | 1256 | break; |
1255 | case AUDIT_SUBJ_USER: | 1257 | case AUDIT_SUBJ_USER: |
1256 | case AUDIT_SUBJ_ROLE: | 1258 | case AUDIT_SUBJ_ROLE: |
1257 | case AUDIT_SUBJ_TYPE: | 1259 | case AUDIT_SUBJ_TYPE: |
1258 | case AUDIT_SUBJ_SEN: | 1260 | case AUDIT_SUBJ_SEN: |
1259 | case AUDIT_SUBJ_CLR: | 1261 | case AUDIT_SUBJ_CLR: |
1260 | if (f->lsm_rule) | 1262 | if (f->lsm_rule) { |
1261 | result = security_audit_rule_match(cb->sid, | 1263 | security_task_getsecid(current, &sid); |
1264 | result = security_audit_rule_match(sid, | ||
1262 | f->type, | 1265 | f->type, |
1263 | f->op, | 1266 | f->op, |
1264 | f->lsm_rule, | 1267 | f->lsm_rule, |
1265 | NULL); | 1268 | NULL); |
1269 | } | ||
1266 | break; | 1270 | break; |
1267 | } | 1271 | } |
1268 | 1272 | ||
diff --git a/kernel/bounds.c b/kernel/bounds.c index 98a51f26c136..0c9b862292b2 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c | |||
@@ -9,11 +9,13 @@ | |||
9 | #include <linux/page-flags.h> | 9 | #include <linux/page-flags.h> |
10 | #include <linux/mmzone.h> | 10 | #include <linux/mmzone.h> |
11 | #include <linux/kbuild.h> | 11 | #include <linux/kbuild.h> |
12 | #include <linux/page_cgroup.h> | ||
12 | 13 | ||
13 | void foo(void) | 14 | void foo(void) |
14 | { | 15 | { |
15 | /* The enum constants to put into include/generated/bounds.h */ | 16 | /* The enum constants to put into include/generated/bounds.h */ |
16 | DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); | 17 | DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); |
17 | DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); | 18 | DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); |
19 | DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS); | ||
18 | /* End of constants */ | 20 | /* End of constants */ |
19 | } | 21 | } |
diff --git a/kernel/capability.c b/kernel/capability.c index 9e9385f132c8..bf0c734d0c12 100644 --- a/kernel/capability.c +++ b/kernel/capability.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/security.h> | 14 | #include <linux/security.h> |
15 | #include <linux/syscalls.h> | 15 | #include <linux/syscalls.h> |
16 | #include <linux/pid_namespace.h> | 16 | #include <linux/pid_namespace.h> |
17 | #include <linux/user_namespace.h> | ||
17 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
18 | 19 | ||
19 | /* | 20 | /* |
@@ -290,6 +291,60 @@ error: | |||
290 | } | 291 | } |
291 | 292 | ||
292 | /** | 293 | /** |
294 | * has_capability - Does a task have a capability in init_user_ns | ||
295 | * @t: The task in question | ||
296 | * @cap: The capability to be tested for | ||
297 | * | ||
298 | * Return true if the specified task has the given superior capability | ||
299 | * currently in effect to the initial user namespace, false if not. | ||
300 | * | ||
301 | * Note that this does not set PF_SUPERPRIV on the task. | ||
302 | */ | ||
303 | bool has_capability(struct task_struct *t, int cap) | ||
304 | { | ||
305 | int ret = security_real_capable(t, &init_user_ns, cap); | ||
306 | |||
307 | return (ret == 0); | ||
308 | } | ||
309 | |||
310 | /** | ||
311 | * has_capability - Does a task have a capability in a specific user ns | ||
312 | * @t: The task in question | ||
313 | * @ns: target user namespace | ||
314 | * @cap: The capability to be tested for | ||
315 | * | ||
316 | * Return true if the specified task has the given superior capability | ||
317 | * currently in effect to the specified user namespace, false if not. | ||
318 | * | ||
319 | * Note that this does not set PF_SUPERPRIV on the task. | ||
320 | */ | ||
321 | bool has_ns_capability(struct task_struct *t, | ||
322 | struct user_namespace *ns, int cap) | ||
323 | { | ||
324 | int ret = security_real_capable(t, ns, cap); | ||
325 | |||
326 | return (ret == 0); | ||
327 | } | ||
328 | |||
329 | /** | ||
330 | * has_capability_noaudit - Does a task have a capability (unaudited) | ||
331 | * @t: The task in question | ||
332 | * @cap: The capability to be tested for | ||
333 | * | ||
334 | * Return true if the specified task has the given superior capability | ||
335 | * currently in effect to init_user_ns, false if not. Don't write an | ||
336 | * audit message for the check. | ||
337 | * | ||
338 | * Note that this does not set PF_SUPERPRIV on the task. | ||
339 | */ | ||
340 | bool has_capability_noaudit(struct task_struct *t, int cap) | ||
341 | { | ||
342 | int ret = security_real_capable_noaudit(t, &init_user_ns, cap); | ||
343 | |||
344 | return (ret == 0); | ||
345 | } | ||
346 | |||
347 | /** | ||
293 | * capable - Determine if the current task has a superior capability in effect | 348 | * capable - Determine if the current task has a superior capability in effect |
294 | * @cap: The capability to be tested for | 349 | * @cap: The capability to be tested for |
295 | * | 350 | * |
@@ -299,17 +354,48 @@ error: | |||
299 | * This sets PF_SUPERPRIV on the task if the capability is available on the | 354 | * This sets PF_SUPERPRIV on the task if the capability is available on the |
300 | * assumption that it's about to be used. | 355 | * assumption that it's about to be used. |
301 | */ | 356 | */ |
302 | int capable(int cap) | 357 | bool capable(int cap) |
358 | { | ||
359 | return ns_capable(&init_user_ns, cap); | ||
360 | } | ||
361 | EXPORT_SYMBOL(capable); | ||
362 | |||
363 | /** | ||
364 | * ns_capable - Determine if the current task has a superior capability in effect | ||
365 | * @ns: The usernamespace we want the capability in | ||
366 | * @cap: The capability to be tested for | ||
367 | * | ||
368 | * Return true if the current task has the given superior capability currently | ||
369 | * available for use, false if not. | ||
370 | * | ||
371 | * This sets PF_SUPERPRIV on the task if the capability is available on the | ||
372 | * assumption that it's about to be used. | ||
373 | */ | ||
374 | bool ns_capable(struct user_namespace *ns, int cap) | ||
303 | { | 375 | { |
304 | if (unlikely(!cap_valid(cap))) { | 376 | if (unlikely(!cap_valid(cap))) { |
305 | printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap); | 377 | printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap); |
306 | BUG(); | 378 | BUG(); |
307 | } | 379 | } |
308 | 380 | ||
309 | if (security_capable(current_cred(), cap) == 0) { | 381 | if (security_capable(ns, current_cred(), cap) == 0) { |
310 | current->flags |= PF_SUPERPRIV; | 382 | current->flags |= PF_SUPERPRIV; |
311 | return 1; | 383 | return true; |
312 | } | 384 | } |
313 | return 0; | 385 | return false; |
314 | } | 386 | } |
315 | EXPORT_SYMBOL(capable); | 387 | EXPORT_SYMBOL(ns_capable); |
388 | |||
389 | /** | ||
390 | * task_ns_capable - Determine whether current task has a superior | ||
391 | * capability targeted at a specific task's user namespace. | ||
392 | * @t: The task whose user namespace is targeted. | ||
393 | * @cap: The capability in question. | ||
394 | * | ||
395 | * Return true if it does, false otherwise. | ||
396 | */ | ||
397 | bool task_ns_capable(struct task_struct *t, int cap) | ||
398 | { | ||
399 | return ns_capable(task_cred_xxx(t, user)->user_ns, cap); | ||
400 | } | ||
401 | EXPORT_SYMBOL(task_ns_capable); | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 95362d15128c..e31b220a743d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -1813,10 +1813,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1813 | 1813 | ||
1814 | /* Update the css_set linked lists if we're using them */ | 1814 | /* Update the css_set linked lists if we're using them */ |
1815 | write_lock(&css_set_lock); | 1815 | write_lock(&css_set_lock); |
1816 | if (!list_empty(&tsk->cg_list)) { | 1816 | if (!list_empty(&tsk->cg_list)) |
1817 | list_del(&tsk->cg_list); | 1817 | list_move(&tsk->cg_list, &newcg->tasks); |
1818 | list_add(&tsk->cg_list, &newcg->tasks); | ||
1819 | } | ||
1820 | write_unlock(&css_set_lock); | 1818 | write_unlock(&css_set_lock); |
1821 | 1819 | ||
1822 | for_each_subsys(root, ss) { | 1820 | for_each_subsys(root, ss) { |
@@ -3655,12 +3653,12 @@ again: | |||
3655 | spin_lock(&release_list_lock); | 3653 | spin_lock(&release_list_lock); |
3656 | set_bit(CGRP_REMOVED, &cgrp->flags); | 3654 | set_bit(CGRP_REMOVED, &cgrp->flags); |
3657 | if (!list_empty(&cgrp->release_list)) | 3655 | if (!list_empty(&cgrp->release_list)) |
3658 | list_del(&cgrp->release_list); | 3656 | list_del_init(&cgrp->release_list); |
3659 | spin_unlock(&release_list_lock); | 3657 | spin_unlock(&release_list_lock); |
3660 | 3658 | ||
3661 | cgroup_lock_hierarchy(cgrp->root); | 3659 | cgroup_lock_hierarchy(cgrp->root); |
3662 | /* delete this cgroup from parent->children */ | 3660 | /* delete this cgroup from parent->children */ |
3663 | list_del(&cgrp->sibling); | 3661 | list_del_init(&cgrp->sibling); |
3664 | cgroup_unlock_hierarchy(cgrp->root); | 3662 | cgroup_unlock_hierarchy(cgrp->root); |
3665 | 3663 | ||
3666 | d = dget(cgrp->dentry); | 3664 | d = dget(cgrp->dentry); |
@@ -3879,7 +3877,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
3879 | subsys[ss->subsys_id] = NULL; | 3877 | subsys[ss->subsys_id] = NULL; |
3880 | 3878 | ||
3881 | /* remove subsystem from rootnode's list of subsystems */ | 3879 | /* remove subsystem from rootnode's list of subsystems */ |
3882 | list_del(&ss->sibling); | 3880 | list_del_init(&ss->sibling); |
3883 | 3881 | ||
3884 | /* | 3882 | /* |
3885 | * disentangle the css from all css_sets attached to the dummytop. as | 3883 | * disentangle the css from all css_sets attached to the dummytop. as |
@@ -4241,7 +4239,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
4241 | if (!list_empty(&tsk->cg_list)) { | 4239 | if (!list_empty(&tsk->cg_list)) { |
4242 | write_lock(&css_set_lock); | 4240 | write_lock(&css_set_lock); |
4243 | if (!list_empty(&tsk->cg_list)) | 4241 | if (!list_empty(&tsk->cg_list)) |
4244 | list_del(&tsk->cg_list); | 4242 | list_del_init(&tsk->cg_list); |
4245 | write_unlock(&css_set_lock); | 4243 | write_unlock(&css_set_lock); |
4246 | } | 4244 | } |
4247 | 4245 | ||
diff --git a/kernel/cpu.c b/kernel/cpu.c index 156cc5556140..c95fc4df0faa 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -160,7 +160,6 @@ static void cpu_notify_nofail(unsigned long val, void *v) | |||
160 | { | 160 | { |
161 | BUG_ON(cpu_notify(val, v)); | 161 | BUG_ON(cpu_notify(val, v)); |
162 | } | 162 | } |
163 | |||
164 | EXPORT_SYMBOL(register_cpu_notifier); | 163 | EXPORT_SYMBOL(register_cpu_notifier); |
165 | 164 | ||
166 | void __ref unregister_cpu_notifier(struct notifier_block *nb) | 165 | void __ref unregister_cpu_notifier(struct notifier_block *nb) |
@@ -205,7 +204,6 @@ static int __ref take_cpu_down(void *_param) | |||
205 | return err; | 204 | return err; |
206 | 205 | ||
207 | cpu_notify(CPU_DYING | param->mod, param->hcpu); | 206 | cpu_notify(CPU_DYING | param->mod, param->hcpu); |
208 | |||
209 | return 0; | 207 | return 0; |
210 | } | 208 | } |
211 | 209 | ||
@@ -227,6 +225,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
227 | return -EINVAL; | 225 | return -EINVAL; |
228 | 226 | ||
229 | cpu_hotplug_begin(); | 227 | cpu_hotplug_begin(); |
228 | |||
230 | err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); | 229 | err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); |
231 | if (err) { | 230 | if (err) { |
232 | nr_calls--; | 231 | nr_calls--; |
@@ -304,7 +303,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) | |||
304 | ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); | 303 | ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); |
305 | if (ret) { | 304 | if (ret) { |
306 | nr_calls--; | 305 | nr_calls--; |
307 | printk("%s: attempt to bring up CPU %u failed\n", | 306 | printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n", |
308 | __func__, cpu); | 307 | __func__, cpu); |
309 | goto out_notify; | 308 | goto out_notify; |
310 | } | 309 | } |
@@ -450,14 +449,14 @@ void __ref enable_nonboot_cpus(void) | |||
450 | if (cpumask_empty(frozen_cpus)) | 449 | if (cpumask_empty(frozen_cpus)) |
451 | goto out; | 450 | goto out; |
452 | 451 | ||
453 | printk("Enabling non-boot CPUs ...\n"); | 452 | printk(KERN_INFO "Enabling non-boot CPUs ...\n"); |
454 | 453 | ||
455 | arch_enable_nonboot_cpus_begin(); | 454 | arch_enable_nonboot_cpus_begin(); |
456 | 455 | ||
457 | for_each_cpu(cpu, frozen_cpus) { | 456 | for_each_cpu(cpu, frozen_cpus) { |
458 | error = _cpu_up(cpu, 1); | 457 | error = _cpu_up(cpu, 1); |
459 | if (!error) { | 458 | if (!error) { |
460 | printk("CPU%d is up\n", cpu); | 459 | printk(KERN_INFO "CPU%d is up\n", cpu); |
461 | continue; | 460 | continue; |
462 | } | 461 | } |
463 | printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); | 462 | printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); |
@@ -509,7 +508,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu) | |||
509 | */ | 508 | */ |
510 | 509 | ||
511 | /* cpu_bit_bitmap[0] is empty - so we can back into it */ | 510 | /* cpu_bit_bitmap[0] is empty - so we can back into it */ |
512 | #define MASK_DECLARE_1(x) [x+1][0] = 1UL << (x) | 511 | #define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x)) |
513 | #define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1) | 512 | #define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1) |
514 | #define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2) | 513 | #define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2) |
515 | #define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4) | 514 | #define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4) |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e92e98189032..33eee16addb8 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1015,17 +1015,12 @@ static void cpuset_change_nodemask(struct task_struct *p, | |||
1015 | struct cpuset *cs; | 1015 | struct cpuset *cs; |
1016 | int migrate; | 1016 | int migrate; |
1017 | const nodemask_t *oldmem = scan->data; | 1017 | const nodemask_t *oldmem = scan->data; |
1018 | NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL); | 1018 | static nodemask_t newmems; /* protected by cgroup_mutex */ |
1019 | |||
1020 | if (!newmems) | ||
1021 | return; | ||
1022 | 1019 | ||
1023 | cs = cgroup_cs(scan->cg); | 1020 | cs = cgroup_cs(scan->cg); |
1024 | guarantee_online_mems(cs, newmems); | 1021 | guarantee_online_mems(cs, &newmems); |
1025 | |||
1026 | cpuset_change_task_nodemask(p, newmems); | ||
1027 | 1022 | ||
1028 | NODEMASK_FREE(newmems); | 1023 | cpuset_change_task_nodemask(p, &newmems); |
1029 | 1024 | ||
1030 | mm = get_task_mm(p); | 1025 | mm = get_task_mm(p); |
1031 | if (!mm) | 1026 | if (!mm) |
@@ -1438,44 +1433,35 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
1438 | struct mm_struct *mm; | 1433 | struct mm_struct *mm; |
1439 | struct cpuset *cs = cgroup_cs(cont); | 1434 | struct cpuset *cs = cgroup_cs(cont); |
1440 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1435 | struct cpuset *oldcs = cgroup_cs(oldcont); |
1441 | NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL); | 1436 | static nodemask_t to; /* protected by cgroup_mutex */ |
1442 | NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL); | ||
1443 | |||
1444 | if (from == NULL || to == NULL) | ||
1445 | goto alloc_fail; | ||
1446 | 1437 | ||
1447 | if (cs == &top_cpuset) { | 1438 | if (cs == &top_cpuset) { |
1448 | cpumask_copy(cpus_attach, cpu_possible_mask); | 1439 | cpumask_copy(cpus_attach, cpu_possible_mask); |
1449 | } else { | 1440 | } else { |
1450 | guarantee_online_cpus(cs, cpus_attach); | 1441 | guarantee_online_cpus(cs, cpus_attach); |
1451 | } | 1442 | } |
1452 | guarantee_online_mems(cs, to); | 1443 | guarantee_online_mems(cs, &to); |
1453 | 1444 | ||
1454 | /* do per-task migration stuff possibly for each in the threadgroup */ | 1445 | /* do per-task migration stuff possibly for each in the threadgroup */ |
1455 | cpuset_attach_task(tsk, to, cs); | 1446 | cpuset_attach_task(tsk, &to, cs); |
1456 | if (threadgroup) { | 1447 | if (threadgroup) { |
1457 | struct task_struct *c; | 1448 | struct task_struct *c; |
1458 | rcu_read_lock(); | 1449 | rcu_read_lock(); |
1459 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | 1450 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { |
1460 | cpuset_attach_task(c, to, cs); | 1451 | cpuset_attach_task(c, &to, cs); |
1461 | } | 1452 | } |
1462 | rcu_read_unlock(); | 1453 | rcu_read_unlock(); |
1463 | } | 1454 | } |
1464 | 1455 | ||
1465 | /* change mm; only needs to be done once even if threadgroup */ | 1456 | /* change mm; only needs to be done once even if threadgroup */ |
1466 | *from = oldcs->mems_allowed; | 1457 | to = cs->mems_allowed; |
1467 | *to = cs->mems_allowed; | ||
1468 | mm = get_task_mm(tsk); | 1458 | mm = get_task_mm(tsk); |
1469 | if (mm) { | 1459 | if (mm) { |
1470 | mpol_rebind_mm(mm, to); | 1460 | mpol_rebind_mm(mm, &to); |
1471 | if (is_memory_migrate(cs)) | 1461 | if (is_memory_migrate(cs)) |
1472 | cpuset_migrate_mm(mm, from, to); | 1462 | cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to); |
1473 | mmput(mm); | 1463 | mmput(mm); |
1474 | } | 1464 | } |
1475 | |||
1476 | alloc_fail: | ||
1477 | NODEMASK_FREE(from); | ||
1478 | NODEMASK_FREE(to); | ||
1479 | } | 1465 | } |
1480 | 1466 | ||
1481 | /* The various types of files and directories in a cpuset file system */ | 1467 | /* The various types of files and directories in a cpuset file system */ |
@@ -1610,34 +1596,26 @@ out: | |||
1610 | * across a page fault. | 1596 | * across a page fault. |
1611 | */ | 1597 | */ |
1612 | 1598 | ||
1613 | static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) | 1599 | static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs) |
1614 | { | 1600 | { |
1615 | int ret; | 1601 | size_t count; |
1616 | 1602 | ||
1617 | mutex_lock(&callback_mutex); | 1603 | mutex_lock(&callback_mutex); |
1618 | ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); | 1604 | count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); |
1619 | mutex_unlock(&callback_mutex); | 1605 | mutex_unlock(&callback_mutex); |
1620 | 1606 | ||
1621 | return ret; | 1607 | return count; |
1622 | } | 1608 | } |
1623 | 1609 | ||
1624 | static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) | 1610 | static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs) |
1625 | { | 1611 | { |
1626 | NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL); | 1612 | size_t count; |
1627 | int retval; | ||
1628 | |||
1629 | if (mask == NULL) | ||
1630 | return -ENOMEM; | ||
1631 | 1613 | ||
1632 | mutex_lock(&callback_mutex); | 1614 | mutex_lock(&callback_mutex); |
1633 | *mask = cs->mems_allowed; | 1615 | count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed); |
1634 | mutex_unlock(&callback_mutex); | 1616 | mutex_unlock(&callback_mutex); |
1635 | 1617 | ||
1636 | retval = nodelist_scnprintf(page, PAGE_SIZE, *mask); | 1618 | return count; |
1637 | |||
1638 | NODEMASK_FREE(mask); | ||
1639 | |||
1640 | return retval; | ||
1641 | } | 1619 | } |
1642 | 1620 | ||
1643 | static ssize_t cpuset_common_file_read(struct cgroup *cont, | 1621 | static ssize_t cpuset_common_file_read(struct cgroup *cont, |
@@ -1862,8 +1840,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss, | |||
1862 | cs = cgroup_cs(cgroup); | 1840 | cs = cgroup_cs(cgroup); |
1863 | parent_cs = cgroup_cs(parent); | 1841 | parent_cs = cgroup_cs(parent); |
1864 | 1842 | ||
1843 | mutex_lock(&callback_mutex); | ||
1865 | cs->mems_allowed = parent_cs->mems_allowed; | 1844 | cs->mems_allowed = parent_cs->mems_allowed; |
1866 | cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); | 1845 | cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); |
1846 | mutex_unlock(&callback_mutex); | ||
1867 | return; | 1847 | return; |
1868 | } | 1848 | } |
1869 | 1849 | ||
@@ -2066,10 +2046,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2066 | struct cpuset *cp; /* scans cpusets being updated */ | 2046 | struct cpuset *cp; /* scans cpusets being updated */ |
2067 | struct cpuset *child; /* scans child cpusets of cp */ | 2047 | struct cpuset *child; /* scans child cpusets of cp */ |
2068 | struct cgroup *cont; | 2048 | struct cgroup *cont; |
2069 | NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); | 2049 | static nodemask_t oldmems; /* protected by cgroup_mutex */ |
2070 | |||
2071 | if (oldmems == NULL) | ||
2072 | return; | ||
2073 | 2050 | ||
2074 | list_add_tail((struct list_head *)&root->stack_list, &queue); | 2051 | list_add_tail((struct list_head *)&root->stack_list, &queue); |
2075 | 2052 | ||
@@ -2086,7 +2063,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2086 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | 2063 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) |
2087 | continue; | 2064 | continue; |
2088 | 2065 | ||
2089 | *oldmems = cp->mems_allowed; | 2066 | oldmems = cp->mems_allowed; |
2090 | 2067 | ||
2091 | /* Remove offline cpus and mems from this cpuset. */ | 2068 | /* Remove offline cpus and mems from this cpuset. */ |
2092 | mutex_lock(&callback_mutex); | 2069 | mutex_lock(&callback_mutex); |
@@ -2102,10 +2079,9 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2102 | remove_tasks_in_empty_cpuset(cp); | 2079 | remove_tasks_in_empty_cpuset(cp); |
2103 | else { | 2080 | else { |
2104 | update_tasks_cpumask(cp, NULL); | 2081 | update_tasks_cpumask(cp, NULL); |
2105 | update_tasks_nodemask(cp, oldmems, NULL); | 2082 | update_tasks_nodemask(cp, &oldmems, NULL); |
2106 | } | 2083 | } |
2107 | } | 2084 | } |
2108 | NODEMASK_FREE(oldmems); | ||
2109 | } | 2085 | } |
2110 | 2086 | ||
2111 | /* | 2087 | /* |
@@ -2147,19 +2123,16 @@ void cpuset_update_active_cpus(void) | |||
2147 | static int cpuset_track_online_nodes(struct notifier_block *self, | 2123 | static int cpuset_track_online_nodes(struct notifier_block *self, |
2148 | unsigned long action, void *arg) | 2124 | unsigned long action, void *arg) |
2149 | { | 2125 | { |
2150 | NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); | 2126 | static nodemask_t oldmems; /* protected by cgroup_mutex */ |
2151 | |||
2152 | if (oldmems == NULL) | ||
2153 | return NOTIFY_DONE; | ||
2154 | 2127 | ||
2155 | cgroup_lock(); | 2128 | cgroup_lock(); |
2156 | switch (action) { | 2129 | switch (action) { |
2157 | case MEM_ONLINE: | 2130 | case MEM_ONLINE: |
2158 | *oldmems = top_cpuset.mems_allowed; | 2131 | oldmems = top_cpuset.mems_allowed; |
2159 | mutex_lock(&callback_mutex); | 2132 | mutex_lock(&callback_mutex); |
2160 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 2133 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
2161 | mutex_unlock(&callback_mutex); | 2134 | mutex_unlock(&callback_mutex); |
2162 | update_tasks_nodemask(&top_cpuset, oldmems, NULL); | 2135 | update_tasks_nodemask(&top_cpuset, &oldmems, NULL); |
2163 | break; | 2136 | break; |
2164 | case MEM_OFFLINE: | 2137 | case MEM_OFFLINE: |
2165 | /* | 2138 | /* |
@@ -2173,7 +2146,6 @@ static int cpuset_track_online_nodes(struct notifier_block *self, | |||
2173 | } | 2146 | } |
2174 | cgroup_unlock(); | 2147 | cgroup_unlock(); |
2175 | 2148 | ||
2176 | NODEMASK_FREE(oldmems); | ||
2177 | return NOTIFY_OK; | 2149 | return NOTIFY_OK; |
2178 | } | 2150 | } |
2179 | #endif | 2151 | #endif |
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c new file mode 100644 index 000000000000..5f85690285d4 --- /dev/null +++ b/kernel/crash_dump.c | |||
@@ -0,0 +1,34 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/crash_dump.h> | ||
3 | #include <linux/init.h> | ||
4 | #include <linux/errno.h> | ||
5 | #include <linux/module.h> | ||
6 | |||
7 | /* | ||
8 | * If we have booted due to a crash, max_pfn will be a very low value. We need | ||
9 | * to know the amount of memory that the previous kernel used. | ||
10 | */ | ||
11 | unsigned long saved_max_pfn; | ||
12 | |||
13 | /* | ||
14 | * stores the physical address of elf header of crash image | ||
15 | * | ||
16 | * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by | ||
17 | * is_kdump_kernel() to determine if we are booting after a panic. Hence put | ||
18 | * it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE. | ||
19 | */ | ||
20 | unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; | ||
21 | |||
22 | /* | ||
23 | * elfcorehdr= specifies the location of elf core header stored by the crashed | ||
24 | * kernel. This option will be passed by kexec loader to the capture kernel. | ||
25 | */ | ||
26 | static int __init setup_elfcorehdr(char *arg) | ||
27 | { | ||
28 | char *end; | ||
29 | if (!arg) | ||
30 | return -EINVAL; | ||
31 | elfcorehdr_addr = memparse(arg, &end); | ||
32 | return end > arg ? 0 : -EINVAL; | ||
33 | } | ||
34 | early_param("elfcorehdr", setup_elfcorehdr); | ||
diff --git a/kernel/cred.c b/kernel/cred.c index 2343c132c5a7..5557b55048df 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -741,6 +741,12 @@ int set_create_files_as(struct cred *new, struct inode *inode) | |||
741 | } | 741 | } |
742 | EXPORT_SYMBOL(set_create_files_as); | 742 | EXPORT_SYMBOL(set_create_files_as); |
743 | 743 | ||
744 | struct user_namespace *current_user_ns(void) | ||
745 | { | ||
746 | return _current_user_ns(); | ||
747 | } | ||
748 | EXPORT_SYMBOL(current_user_ns); | ||
749 | |||
744 | #ifdef CONFIG_DEBUG_CREDENTIALS | 750 | #ifdef CONFIG_DEBUG_CREDENTIALS |
745 | 751 | ||
746 | bool creds_are_invalid(const struct cred *cred) | 752 | bool creds_are_invalid(const struct cred *cred) |
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 481a7bd2dfe7..a11db956dd62 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c | |||
@@ -1093,3 +1093,33 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd) | |||
1093 | put_packet(remcom_out_buffer); | 1093 | put_packet(remcom_out_buffer); |
1094 | return 0; | 1094 | return 0; |
1095 | } | 1095 | } |
1096 | |||
1097 | /** | ||
1098 | * gdbstub_exit - Send an exit message to GDB | ||
1099 | * @status: The exit code to report. | ||
1100 | */ | ||
1101 | void gdbstub_exit(int status) | ||
1102 | { | ||
1103 | unsigned char checksum, ch, buffer[3]; | ||
1104 | int loop; | ||
1105 | |||
1106 | buffer[0] = 'W'; | ||
1107 | buffer[1] = hex_asc_hi(status); | ||
1108 | buffer[2] = hex_asc_lo(status); | ||
1109 | |||
1110 | dbg_io_ops->write_char('$'); | ||
1111 | checksum = 0; | ||
1112 | |||
1113 | for (loop = 0; loop < 3; loop++) { | ||
1114 | ch = buffer[loop]; | ||
1115 | checksum += ch; | ||
1116 | dbg_io_ops->write_char(ch); | ||
1117 | } | ||
1118 | |||
1119 | dbg_io_ops->write_char('#'); | ||
1120 | dbg_io_ops->write_char(hex_asc_hi(checksum)); | ||
1121 | dbg_io_ops->write_char(hex_asc_lo(checksum)); | ||
1122 | |||
1123 | /* make sure the output is flushed, lest the bootloader clobber it */ | ||
1124 | dbg_io_ops->flush(); | ||
1125 | } | ||
diff --git a/kernel/exit.c b/kernel/exit.c index f9a45ebcc7b1..6a488ad2dce5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code) | |||
908 | profile_task_exit(tsk); | 908 | profile_task_exit(tsk); |
909 | 909 | ||
910 | WARN_ON(atomic_read(&tsk->fs_excl)); | 910 | WARN_ON(atomic_read(&tsk->fs_excl)); |
911 | WARN_ON(blk_needs_flush_plug(tsk)); | ||
911 | 912 | ||
912 | if (unlikely(in_interrupt())) | 913 | if (unlikely(in_interrupt())) |
913 | panic("Aiee, killing interrupt handler!"); | 914 | panic("Aiee, killing interrupt handler!"); |
diff --git a/kernel/fork.c b/kernel/fork.c index 25e429152ddc..e7548dee636b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/tracehook.h> | 40 | #include <linux/tracehook.h> |
41 | #include <linux/futex.h> | 41 | #include <linux/futex.h> |
42 | #include <linux/compat.h> | 42 | #include <linux/compat.h> |
43 | #include <linux/kthread.h> | ||
43 | #include <linux/task_io_accounting_ops.h> | 44 | #include <linux/task_io_accounting_ops.h> |
44 | #include <linux/rcupdate.h> | 45 | #include <linux/rcupdate.h> |
45 | #include <linux/ptrace.h> | 46 | #include <linux/ptrace.h> |
@@ -109,20 +110,25 @@ int nr_processes(void) | |||
109 | } | 110 | } |
110 | 111 | ||
111 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR | 112 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR |
112 | # define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL) | 113 | # define alloc_task_struct_node(node) \ |
113 | # define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk)) | 114 | kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node) |
115 | # define free_task_struct(tsk) \ | ||
116 | kmem_cache_free(task_struct_cachep, (tsk)) | ||
114 | static struct kmem_cache *task_struct_cachep; | 117 | static struct kmem_cache *task_struct_cachep; |
115 | #endif | 118 | #endif |
116 | 119 | ||
117 | #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR | 120 | #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR |
118 | static inline struct thread_info *alloc_thread_info(struct task_struct *tsk) | 121 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, |
122 | int node) | ||
119 | { | 123 | { |
120 | #ifdef CONFIG_DEBUG_STACK_USAGE | 124 | #ifdef CONFIG_DEBUG_STACK_USAGE |
121 | gfp_t mask = GFP_KERNEL | __GFP_ZERO; | 125 | gfp_t mask = GFP_KERNEL | __GFP_ZERO; |
122 | #else | 126 | #else |
123 | gfp_t mask = GFP_KERNEL; | 127 | gfp_t mask = GFP_KERNEL; |
124 | #endif | 128 | #endif |
125 | return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER); | 129 | struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER); |
130 | |||
131 | return page ? page_address(page) : NULL; | ||
126 | } | 132 | } |
127 | 133 | ||
128 | static inline void free_thread_info(struct thread_info *ti) | 134 | static inline void free_thread_info(struct thread_info *ti) |
@@ -193,6 +199,7 @@ void __put_task_struct(struct task_struct *tsk) | |||
193 | if (!profile_handoff_task(tsk)) | 199 | if (!profile_handoff_task(tsk)) |
194 | free_task(tsk); | 200 | free_task(tsk); |
195 | } | 201 | } |
202 | EXPORT_SYMBOL_GPL(__put_task_struct); | ||
196 | 203 | ||
197 | /* | 204 | /* |
198 | * macro override instead of weak attribute alias, to workaround | 205 | * macro override instead of weak attribute alias, to workaround |
@@ -248,16 +255,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
248 | struct task_struct *tsk; | 255 | struct task_struct *tsk; |
249 | struct thread_info *ti; | 256 | struct thread_info *ti; |
250 | unsigned long *stackend; | 257 | unsigned long *stackend; |
251 | 258 | int node = tsk_fork_get_node(orig); | |
252 | int err; | 259 | int err; |
253 | 260 | ||
254 | prepare_to_copy(orig); | 261 | prepare_to_copy(orig); |
255 | 262 | ||
256 | tsk = alloc_task_struct(); | 263 | tsk = alloc_task_struct_node(node); |
257 | if (!tsk) | 264 | if (!tsk) |
258 | return NULL; | 265 | return NULL; |
259 | 266 | ||
260 | ti = alloc_thread_info(tsk); | 267 | ti = alloc_thread_info_node(tsk, node); |
261 | if (!ti) { | 268 | if (!ti) { |
262 | free_task_struct(tsk); | 269 | free_task_struct(tsk); |
263 | return NULL; | 270 | return NULL; |
@@ -1180,12 +1187,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1180 | pid = alloc_pid(p->nsproxy->pid_ns); | 1187 | pid = alloc_pid(p->nsproxy->pid_ns); |
1181 | if (!pid) | 1188 | if (!pid) |
1182 | goto bad_fork_cleanup_io; | 1189 | goto bad_fork_cleanup_io; |
1183 | |||
1184 | if (clone_flags & CLONE_NEWPID) { | ||
1185 | retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); | ||
1186 | if (retval < 0) | ||
1187 | goto bad_fork_free_pid; | ||
1188 | } | ||
1189 | } | 1190 | } |
1190 | 1191 | ||
1191 | p->pid = pid_nr(pid); | 1192 | p->pid = pid_nr(pid); |
@@ -1204,6 +1205,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1204 | * Clear TID on mm_release()? | 1205 | * Clear TID on mm_release()? |
1205 | */ | 1206 | */ |
1206 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; | 1207 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; |
1208 | #ifdef CONFIG_BLOCK | ||
1209 | p->plug = NULL; | ||
1210 | #endif | ||
1207 | #ifdef CONFIG_FUTEX | 1211 | #ifdef CONFIG_FUTEX |
1208 | p->robust_list = NULL; | 1212 | p->robust_list = NULL; |
1209 | #ifdef CONFIG_COMPAT | 1213 | #ifdef CONFIG_COMPAT |
@@ -1289,7 +1293,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1289 | tracehook_finish_clone(p, clone_flags, trace); | 1293 | tracehook_finish_clone(p, clone_flags, trace); |
1290 | 1294 | ||
1291 | if (thread_group_leader(p)) { | 1295 | if (thread_group_leader(p)) { |
1292 | if (clone_flags & CLONE_NEWPID) | 1296 | if (is_child_reaper(pid)) |
1293 | p->nsproxy->pid_ns->child_reaper = p; | 1297 | p->nsproxy->pid_ns->child_reaper = p; |
1294 | 1298 | ||
1295 | p->signal->leader_pid = pid; | 1299 | p->signal->leader_pid = pid; |
@@ -1512,38 +1516,24 @@ void __init proc_caches_init(void) | |||
1512 | } | 1516 | } |
1513 | 1517 | ||
1514 | /* | 1518 | /* |
1515 | * Check constraints on flags passed to the unshare system call and | 1519 | * Check constraints on flags passed to the unshare system call. |
1516 | * force unsharing of additional process context as appropriate. | ||
1517 | */ | 1520 | */ |
1518 | static void check_unshare_flags(unsigned long *flags_ptr) | 1521 | static int check_unshare_flags(unsigned long unshare_flags) |
1519 | { | 1522 | { |
1523 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | ||
1524 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | ||
1525 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) | ||
1526 | return -EINVAL; | ||
1520 | /* | 1527 | /* |
1521 | * If unsharing a thread from a thread group, must also | 1528 | * Not implemented, but pretend it works if there is nothing to |
1522 | * unshare vm. | 1529 | * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND |
1523 | */ | 1530 | * needs to unshare vm. |
1524 | if (*flags_ptr & CLONE_THREAD) | ||
1525 | *flags_ptr |= CLONE_VM; | ||
1526 | |||
1527 | /* | ||
1528 | * If unsharing vm, must also unshare signal handlers. | ||
1529 | */ | ||
1530 | if (*flags_ptr & CLONE_VM) | ||
1531 | *flags_ptr |= CLONE_SIGHAND; | ||
1532 | |||
1533 | /* | ||
1534 | * If unsharing namespace, must also unshare filesystem information. | ||
1535 | */ | 1531 | */ |
1536 | if (*flags_ptr & CLONE_NEWNS) | 1532 | if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) { |
1537 | *flags_ptr |= CLONE_FS; | 1533 | /* FIXME: get_task_mm() increments ->mm_users */ |
1538 | } | 1534 | if (atomic_read(¤t->mm->mm_users) > 1) |
1539 | 1535 | return -EINVAL; | |
1540 | /* | 1536 | } |
1541 | * Unsharing of tasks created with CLONE_THREAD is not supported yet | ||
1542 | */ | ||
1543 | static int unshare_thread(unsigned long unshare_flags) | ||
1544 | { | ||
1545 | if (unshare_flags & CLONE_THREAD) | ||
1546 | return -EINVAL; | ||
1547 | 1537 | ||
1548 | return 0; | 1538 | return 0; |
1549 | } | 1539 | } |
@@ -1570,34 +1560,6 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) | |||
1570 | } | 1560 | } |
1571 | 1561 | ||
1572 | /* | 1562 | /* |
1573 | * Unsharing of sighand is not supported yet | ||
1574 | */ | ||
1575 | static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp) | ||
1576 | { | ||
1577 | struct sighand_struct *sigh = current->sighand; | ||
1578 | |||
1579 | if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1) | ||
1580 | return -EINVAL; | ||
1581 | else | ||
1582 | return 0; | ||
1583 | } | ||
1584 | |||
1585 | /* | ||
1586 | * Unshare vm if it is being shared | ||
1587 | */ | ||
1588 | static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp) | ||
1589 | { | ||
1590 | struct mm_struct *mm = current->mm; | ||
1591 | |||
1592 | if ((unshare_flags & CLONE_VM) && | ||
1593 | (mm && atomic_read(&mm->mm_users) > 1)) { | ||
1594 | return -EINVAL; | ||
1595 | } | ||
1596 | |||
1597 | return 0; | ||
1598 | } | ||
1599 | |||
1600 | /* | ||
1601 | * Unshare file descriptor table if it is being shared | 1563 | * Unshare file descriptor table if it is being shared |
1602 | */ | 1564 | */ |
1603 | static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) | 1565 | static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) |
@@ -1625,45 +1587,37 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp | |||
1625 | */ | 1587 | */ |
1626 | SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | 1588 | SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) |
1627 | { | 1589 | { |
1628 | int err = 0; | ||
1629 | struct fs_struct *fs, *new_fs = NULL; | 1590 | struct fs_struct *fs, *new_fs = NULL; |
1630 | struct sighand_struct *new_sigh = NULL; | ||
1631 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; | ||
1632 | struct files_struct *fd, *new_fd = NULL; | 1591 | struct files_struct *fd, *new_fd = NULL; |
1633 | struct nsproxy *new_nsproxy = NULL; | 1592 | struct nsproxy *new_nsproxy = NULL; |
1634 | int do_sysvsem = 0; | 1593 | int do_sysvsem = 0; |
1594 | int err; | ||
1635 | 1595 | ||
1636 | check_unshare_flags(&unshare_flags); | 1596 | err = check_unshare_flags(unshare_flags); |
1637 | 1597 | if (err) | |
1638 | /* Return -EINVAL for all unsupported flags */ | ||
1639 | err = -EINVAL; | ||
1640 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | ||
1641 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | ||
1642 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) | ||
1643 | goto bad_unshare_out; | 1598 | goto bad_unshare_out; |
1644 | 1599 | ||
1645 | /* | 1600 | /* |
1601 | * If unsharing namespace, must also unshare filesystem information. | ||
1602 | */ | ||
1603 | if (unshare_flags & CLONE_NEWNS) | ||
1604 | unshare_flags |= CLONE_FS; | ||
1605 | /* | ||
1646 | * CLONE_NEWIPC must also detach from the undolist: after switching | 1606 | * CLONE_NEWIPC must also detach from the undolist: after switching |
1647 | * to a new ipc namespace, the semaphore arrays from the old | 1607 | * to a new ipc namespace, the semaphore arrays from the old |
1648 | * namespace are unreachable. | 1608 | * namespace are unreachable. |
1649 | */ | 1609 | */ |
1650 | if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) | 1610 | if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) |
1651 | do_sysvsem = 1; | 1611 | do_sysvsem = 1; |
1652 | if ((err = unshare_thread(unshare_flags))) | ||
1653 | goto bad_unshare_out; | ||
1654 | if ((err = unshare_fs(unshare_flags, &new_fs))) | 1612 | if ((err = unshare_fs(unshare_flags, &new_fs))) |
1655 | goto bad_unshare_cleanup_thread; | 1613 | goto bad_unshare_out; |
1656 | if ((err = unshare_sighand(unshare_flags, &new_sigh))) | ||
1657 | goto bad_unshare_cleanup_fs; | ||
1658 | if ((err = unshare_vm(unshare_flags, &new_mm))) | ||
1659 | goto bad_unshare_cleanup_sigh; | ||
1660 | if ((err = unshare_fd(unshare_flags, &new_fd))) | 1614 | if ((err = unshare_fd(unshare_flags, &new_fd))) |
1661 | goto bad_unshare_cleanup_vm; | 1615 | goto bad_unshare_cleanup_fs; |
1662 | if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, | 1616 | if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, |
1663 | new_fs))) | 1617 | new_fs))) |
1664 | goto bad_unshare_cleanup_fd; | 1618 | goto bad_unshare_cleanup_fd; |
1665 | 1619 | ||
1666 | if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) { | 1620 | if (new_fs || new_fd || do_sysvsem || new_nsproxy) { |
1667 | if (do_sysvsem) { | 1621 | if (do_sysvsem) { |
1668 | /* | 1622 | /* |
1669 | * CLONE_SYSVSEM is equivalent to sys_exit(). | 1623 | * CLONE_SYSVSEM is equivalent to sys_exit(). |
@@ -1689,19 +1643,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1689 | spin_unlock(&fs->lock); | 1643 | spin_unlock(&fs->lock); |
1690 | } | 1644 | } |
1691 | 1645 | ||
1692 | if (new_mm) { | ||
1693 | mm = current->mm; | ||
1694 | active_mm = current->active_mm; | ||
1695 | current->mm = new_mm; | ||
1696 | current->active_mm = new_mm; | ||
1697 | if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { | ||
1698 | atomic_dec(&mm->oom_disable_count); | ||
1699 | atomic_inc(&new_mm->oom_disable_count); | ||
1700 | } | ||
1701 | activate_mm(active_mm, new_mm); | ||
1702 | new_mm = mm; | ||
1703 | } | ||
1704 | |||
1705 | if (new_fd) { | 1646 | if (new_fd) { |
1706 | fd = current->files; | 1647 | fd = current->files; |
1707 | current->files = new_fd; | 1648 | current->files = new_fd; |
@@ -1718,20 +1659,10 @@ bad_unshare_cleanup_fd: | |||
1718 | if (new_fd) | 1659 | if (new_fd) |
1719 | put_files_struct(new_fd); | 1660 | put_files_struct(new_fd); |
1720 | 1661 | ||
1721 | bad_unshare_cleanup_vm: | ||
1722 | if (new_mm) | ||
1723 | mmput(new_mm); | ||
1724 | |||
1725 | bad_unshare_cleanup_sigh: | ||
1726 | if (new_sigh) | ||
1727 | if (atomic_dec_and_test(&new_sigh->count)) | ||
1728 | kmem_cache_free(sighand_cachep, new_sigh); | ||
1729 | |||
1730 | bad_unshare_cleanup_fs: | 1662 | bad_unshare_cleanup_fs: |
1731 | if (new_fs) | 1663 | if (new_fs) |
1732 | free_fs_struct(new_fs); | 1664 | free_fs_struct(new_fs); |
1733 | 1665 | ||
1734 | bad_unshare_cleanup_thread: | ||
1735 | bad_unshare_out: | 1666 | bad_unshare_out: |
1736 | return err; | 1667 | return err; |
1737 | } | 1668 | } |
diff --git a/kernel/futex.c b/kernel/futex.c index bda415715382..dfb924ffe65b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -782,8 +782,8 @@ static void __unqueue_futex(struct futex_q *q) | |||
782 | { | 782 | { |
783 | struct futex_hash_bucket *hb; | 783 | struct futex_hash_bucket *hb; |
784 | 784 | ||
785 | if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr) | 785 | if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr)) |
786 | || plist_node_empty(&q->list))) | 786 | || WARN_ON(plist_node_empty(&q->list))) |
787 | return; | 787 | return; |
788 | 788 | ||
789 | hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); | 789 | hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); |
@@ -2418,10 +2418,19 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, | |||
2418 | goto err_unlock; | 2418 | goto err_unlock; |
2419 | ret = -EPERM; | 2419 | ret = -EPERM; |
2420 | pcred = __task_cred(p); | 2420 | pcred = __task_cred(p); |
2421 | /* If victim is in different user_ns, then uids are not | ||
2422 | comparable, so we must have CAP_SYS_PTRACE */ | ||
2423 | if (cred->user->user_ns != pcred->user->user_ns) { | ||
2424 | if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) | ||
2425 | goto err_unlock; | ||
2426 | goto ok; | ||
2427 | } | ||
2428 | /* If victim is in same user_ns, then uids are comparable */ | ||
2421 | if (cred->euid != pcred->euid && | 2429 | if (cred->euid != pcred->euid && |
2422 | cred->euid != pcred->uid && | 2430 | cred->euid != pcred->uid && |
2423 | !capable(CAP_SYS_PTRACE)) | 2431 | !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) |
2424 | goto err_unlock; | 2432 | goto err_unlock; |
2433 | ok: | ||
2425 | head = p->robust_list; | 2434 | head = p->robust_list; |
2426 | rcu_read_unlock(); | 2435 | rcu_read_unlock(); |
2427 | } | 2436 | } |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index a7934ac75e5b..5f9e689dc8f0 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
@@ -153,10 +153,19 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, | |||
153 | goto err_unlock; | 153 | goto err_unlock; |
154 | ret = -EPERM; | 154 | ret = -EPERM; |
155 | pcred = __task_cred(p); | 155 | pcred = __task_cred(p); |
156 | /* If victim is in different user_ns, then uids are not | ||
157 | comparable, so we must have CAP_SYS_PTRACE */ | ||
158 | if (cred->user->user_ns != pcred->user->user_ns) { | ||
159 | if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) | ||
160 | goto err_unlock; | ||
161 | goto ok; | ||
162 | } | ||
163 | /* If victim is in same user_ns, then uids are comparable */ | ||
156 | if (cred->euid != pcred->euid && | 164 | if (cred->euid != pcred->euid && |
157 | cred->euid != pcred->uid && | 165 | cred->euid != pcred->uid && |
158 | !capable(CAP_SYS_PTRACE)) | 166 | !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) |
159 | goto err_unlock; | 167 | goto err_unlock; |
168 | ok: | ||
160 | head = p->compat_robust_list; | 169 | head = p->compat_robust_list; |
161 | rcu_read_unlock(); | 170 | rcu_read_unlock(); |
162 | } | 171 | } |
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index 70a298d6da71..b8cadf70b1fb 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig | |||
@@ -34,7 +34,7 @@ config GCOV_KERNEL | |||
34 | config GCOV_PROFILE_ALL | 34 | config GCOV_PROFILE_ALL |
35 | bool "Profile entire Kernel" | 35 | bool "Profile entire Kernel" |
36 | depends on GCOV_KERNEL | 36 | depends on GCOV_KERNEL |
37 | depends on S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE | 37 | depends on SUPERH || S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE |
38 | default n | 38 | default n |
39 | ---help--- | 39 | ---help--- |
40 | This options activates profiling for the entire kernel. | 40 | This options activates profiling for the entire kernel. |
diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile index 3f761001d517..e97ca59e2520 100644 --- a/kernel/gcov/Makefile +++ b/kernel/gcov/Makefile | |||
@@ -1,3 +1,3 @@ | |||
1 | EXTRA_CFLAGS := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' | 1 | ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' |
2 | 2 | ||
3 | obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o | 3 | obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o |
diff --git a/kernel/groups.c b/kernel/groups.c index 253dc0f35cf4..1cc476d52dd3 100644 --- a/kernel/groups.c +++ b/kernel/groups.c | |||
@@ -233,7 +233,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) | |||
233 | struct group_info *group_info; | 233 | struct group_info *group_info; |
234 | int retval; | 234 | int retval; |
235 | 235 | ||
236 | if (!capable(CAP_SETGID)) | 236 | if (!nsown_capable(CAP_SETGID)) |
237 | return -EPERM; | 237 | return -EPERM; |
238 | if ((unsigned)gidsetsize > NGROUPS_MAX) | 238 | if ((unsigned)gidsetsize > NGROUPS_MAX) |
239 | return -EINVAL; | 239 | return -EINVAL; |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index dbccc799407f..6fb014f172f7 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -198,15 +198,6 @@ err: | |||
198 | return -ENOMEM; | 198 | return -ENOMEM; |
199 | } | 199 | } |
200 | 200 | ||
201 | struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) | ||
202 | { | ||
203 | int res = irq_alloc_descs(irq, irq, 1, node); | ||
204 | |||
205 | if (res == -EEXIST || res == irq) | ||
206 | return irq_to_desc(irq); | ||
207 | return NULL; | ||
208 | } | ||
209 | |||
210 | static int irq_expand_nr_irqs(unsigned int nr) | 201 | static int irq_expand_nr_irqs(unsigned int nr) |
211 | { | 202 | { |
212 | if (nr > IRQ_BITMAP_BITS) | 203 | if (nr > IRQ_BITMAP_BITS) |
@@ -283,11 +274,6 @@ struct irq_desc *irq_to_desc(unsigned int irq) | |||
283 | return (irq < NR_IRQS) ? irq_desc + irq : NULL; | 274 | return (irq < NR_IRQS) ? irq_desc + irq : NULL; |
284 | } | 275 | } |
285 | 276 | ||
286 | struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node) | ||
287 | { | ||
288 | return irq_to_desc(irq); | ||
289 | } | ||
290 | |||
291 | static void free_desc(unsigned int irq) | 277 | static void free_desc(unsigned int irq) |
292 | { | 278 | { |
293 | dynamic_irq_cleanup(irq); | 279 | dynamic_irq_cleanup(irq); |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index acd599a43bfb..0a2aa73e536c 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -1064,10 +1064,10 @@ mismatch: | |||
1064 | ret = -EBUSY; | 1064 | ret = -EBUSY; |
1065 | 1065 | ||
1066 | out_mask: | 1066 | out_mask: |
1067 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
1067 | free_cpumask_var(mask); | 1068 | free_cpumask_var(mask); |
1068 | 1069 | ||
1069 | out_thread: | 1070 | out_thread: |
1070 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
1071 | if (new->thread) { | 1071 | if (new->thread) { |
1072 | struct task_struct *t = new->thread; | 1072 | struct task_struct *t = new->thread; |
1073 | 1073 | ||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 4cc2e5ed0bec..760248de109d 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -405,7 +405,8 @@ int show_interrupts(struct seq_file *p, void *v) | |||
405 | for_each_online_cpu(j) | 405 | for_each_online_cpu(j) |
406 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); | 406 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); |
407 | seq_printf(p, " %8s", desc->irq_data.chip->name); | 407 | seq_printf(p, " %8s", desc->irq_data.chip->name); |
408 | seq_printf(p, "-%-8s", desc->name); | 408 | if (desc->name) |
409 | seq_printf(p, "-%-8s", desc->name); | ||
409 | 410 | ||
410 | if (action) { | 411 | if (action) { |
411 | seq_printf(p, " %s", action->name); | 412 | seq_printf(p, " %s", action->name); |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 6f6d091b5757..079f1d39a8b8 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -64,14 +64,14 @@ static inline int is_kernel_text(unsigned long addr) | |||
64 | if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || | 64 | if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || |
65 | arch_is_kernel_text(addr)) | 65 | arch_is_kernel_text(addr)) |
66 | return 1; | 66 | return 1; |
67 | return in_gate_area_no_task(addr); | 67 | return in_gate_area_no_mm(addr); |
68 | } | 68 | } |
69 | 69 | ||
70 | static inline int is_kernel(unsigned long addr) | 70 | static inline int is_kernel(unsigned long addr) |
71 | { | 71 | { |
72 | if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) | 72 | if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) |
73 | return 1; | 73 | return 1; |
74 | return in_gate_area_no_task(addr); | 74 | return in_gate_area_no_mm(addr); |
75 | } | 75 | } |
76 | 76 | ||
77 | static int is_ksym_addr(unsigned long addr) | 77 | static int is_ksym_addr(unsigned long addr) |
@@ -342,13 +342,15 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, | |||
342 | } | 342 | } |
343 | 343 | ||
344 | /* Look up a kernel symbol and return it in a text buffer. */ | 344 | /* Look up a kernel symbol and return it in a text buffer. */ |
345 | int sprint_symbol(char *buffer, unsigned long address) | 345 | static int __sprint_symbol(char *buffer, unsigned long address, |
346 | int symbol_offset) | ||
346 | { | 347 | { |
347 | char *modname; | 348 | char *modname; |
348 | const char *name; | 349 | const char *name; |
349 | unsigned long offset, size; | 350 | unsigned long offset, size; |
350 | int len; | 351 | int len; |
351 | 352 | ||
353 | address += symbol_offset; | ||
352 | name = kallsyms_lookup(address, &size, &offset, &modname, buffer); | 354 | name = kallsyms_lookup(address, &size, &offset, &modname, buffer); |
353 | if (!name) | 355 | if (!name) |
354 | return sprintf(buffer, "0x%lx", address); | 356 | return sprintf(buffer, "0x%lx", address); |
@@ -357,17 +359,53 @@ int sprint_symbol(char *buffer, unsigned long address) | |||
357 | strcpy(buffer, name); | 359 | strcpy(buffer, name); |
358 | len = strlen(buffer); | 360 | len = strlen(buffer); |
359 | buffer += len; | 361 | buffer += len; |
362 | offset -= symbol_offset; | ||
360 | 363 | ||
361 | if (modname) | 364 | if (modname) |
362 | len += sprintf(buffer, "+%#lx/%#lx [%s]", | 365 | len += sprintf(buffer, "+%#lx/%#lx [%s]", offset, size, modname); |
363 | offset, size, modname); | ||
364 | else | 366 | else |
365 | len += sprintf(buffer, "+%#lx/%#lx", offset, size); | 367 | len += sprintf(buffer, "+%#lx/%#lx", offset, size); |
366 | 368 | ||
367 | return len; | 369 | return len; |
368 | } | 370 | } |
371 | |||
372 | /** | ||
373 | * sprint_symbol - Look up a kernel symbol and return it in a text buffer | ||
374 | * @buffer: buffer to be stored | ||
375 | * @address: address to lookup | ||
376 | * | ||
377 | * This function looks up a kernel symbol with @address and stores its name, | ||
378 | * offset, size and module name to @buffer if possible. If no symbol was found, | ||
379 | * just saves its @address as is. | ||
380 | * | ||
381 | * This function returns the number of bytes stored in @buffer. | ||
382 | */ | ||
383 | int sprint_symbol(char *buffer, unsigned long address) | ||
384 | { | ||
385 | return __sprint_symbol(buffer, address, 0); | ||
386 | } | ||
387 | |||
369 | EXPORT_SYMBOL_GPL(sprint_symbol); | 388 | EXPORT_SYMBOL_GPL(sprint_symbol); |
370 | 389 | ||
390 | /** | ||
391 | * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer | ||
392 | * @buffer: buffer to be stored | ||
393 | * @address: address to lookup | ||
394 | * | ||
395 | * This function is for stack backtrace and does the same thing as | ||
396 | * sprint_symbol() but with modified/decreased @address. If there is a | ||
397 | * tail-call to the function marked "noreturn", gcc optimized out code after | ||
398 | * the call so that the stack-saved return address could point outside of the | ||
399 | * caller. This function ensures that kallsyms will find the original caller | ||
400 | * by decreasing @address. | ||
401 | * | ||
402 | * This function returns the number of bytes stored in @buffer. | ||
403 | */ | ||
404 | int sprint_backtrace(char *buffer, unsigned long address) | ||
405 | { | ||
406 | return __sprint_symbol(buffer, address, -1); | ||
407 | } | ||
408 | |||
371 | /* Look up a kernel symbol and print it to the kernel messages. */ | 409 | /* Look up a kernel symbol and print it to the kernel messages. */ |
372 | void __print_symbol(const char *fmt, unsigned long address) | 410 | void __print_symbol(const char *fmt, unsigned long address) |
373 | { | 411 | { |
@@ -477,13 +515,11 @@ static int s_show(struct seq_file *m, void *p) | |||
477 | */ | 515 | */ |
478 | type = iter->exported ? toupper(iter->type) : | 516 | type = iter->exported ? toupper(iter->type) : |
479 | tolower(iter->type); | 517 | tolower(iter->type); |
480 | seq_printf(m, "%0*lx %c %s\t[%s]\n", | 518 | seq_printf(m, "%pK %c %s\t[%s]\n", (void *)iter->value, |
481 | (int)(2 * sizeof(void *)), | 519 | type, iter->name, iter->module_name); |
482 | iter->value, type, iter->name, iter->module_name); | ||
483 | } else | 520 | } else |
484 | seq_printf(m, "%0*lx %c %s\n", | 521 | seq_printf(m, "%pK %c %s\n", (void *)iter->value, |
485 | (int)(2 * sizeof(void *)), | 522 | iter->type, iter->name); |
486 | iter->value, iter->type, iter->name); | ||
487 | return 0; | 523 | return 0; |
488 | } | 524 | } |
489 | 525 | ||
diff --git a/kernel/kthread.c b/kernel/kthread.c index c55afba990a3..684ab3f7dd72 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -27,6 +27,7 @@ struct kthread_create_info | |||
27 | /* Information passed to kthread() from kthreadd. */ | 27 | /* Information passed to kthread() from kthreadd. */ |
28 | int (*threadfn)(void *data); | 28 | int (*threadfn)(void *data); |
29 | void *data; | 29 | void *data; |
30 | int node; | ||
30 | 31 | ||
31 | /* Result passed back to kthread_create() from kthreadd. */ | 32 | /* Result passed back to kthread_create() from kthreadd. */ |
32 | struct task_struct *result; | 33 | struct task_struct *result; |
@@ -98,10 +99,23 @@ static int kthread(void *_create) | |||
98 | do_exit(ret); | 99 | do_exit(ret); |
99 | } | 100 | } |
100 | 101 | ||
102 | /* called from do_fork() to get node information for about to be created task */ | ||
103 | int tsk_fork_get_node(struct task_struct *tsk) | ||
104 | { | ||
105 | #ifdef CONFIG_NUMA | ||
106 | if (tsk == kthreadd_task) | ||
107 | return tsk->pref_node_fork; | ||
108 | #endif | ||
109 | return numa_node_id(); | ||
110 | } | ||
111 | |||
101 | static void create_kthread(struct kthread_create_info *create) | 112 | static void create_kthread(struct kthread_create_info *create) |
102 | { | 113 | { |
103 | int pid; | 114 | int pid; |
104 | 115 | ||
116 | #ifdef CONFIG_NUMA | ||
117 | current->pref_node_fork = create->node; | ||
118 | #endif | ||
105 | /* We want our own signal handler (we take no signals by default). */ | 119 | /* We want our own signal handler (we take no signals by default). */ |
106 | pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); | 120 | pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); |
107 | if (pid < 0) { | 121 | if (pid < 0) { |
@@ -111,15 +125,18 @@ static void create_kthread(struct kthread_create_info *create) | |||
111 | } | 125 | } |
112 | 126 | ||
113 | /** | 127 | /** |
114 | * kthread_create - create a kthread. | 128 | * kthread_create_on_node - create a kthread. |
115 | * @threadfn: the function to run until signal_pending(current). | 129 | * @threadfn: the function to run until signal_pending(current). |
116 | * @data: data ptr for @threadfn. | 130 | * @data: data ptr for @threadfn. |
131 | * @node: memory node number. | ||
117 | * @namefmt: printf-style name for the thread. | 132 | * @namefmt: printf-style name for the thread. |
118 | * | 133 | * |
119 | * Description: This helper function creates and names a kernel | 134 | * Description: This helper function creates and names a kernel |
120 | * thread. The thread will be stopped: use wake_up_process() to start | 135 | * thread. The thread will be stopped: use wake_up_process() to start |
121 | * it. See also kthread_run(). | 136 | * it. See also kthread_run(). |
122 | * | 137 | * |
138 | * If thread is going to be bound on a particular cpu, give its node | ||
139 | * in @node, to get NUMA affinity for kthread stack, or else give -1. | ||
123 | * When woken, the thread will run @threadfn() with @data as its | 140 | * When woken, the thread will run @threadfn() with @data as its |
124 | * argument. @threadfn() can either call do_exit() directly if it is a | 141 | * argument. @threadfn() can either call do_exit() directly if it is a |
125 | * standalone thread for which noone will call kthread_stop(), or | 142 | * standalone thread for which noone will call kthread_stop(), or |
@@ -129,15 +146,17 @@ static void create_kthread(struct kthread_create_info *create) | |||
129 | * | 146 | * |
130 | * Returns a task_struct or ERR_PTR(-ENOMEM). | 147 | * Returns a task_struct or ERR_PTR(-ENOMEM). |
131 | */ | 148 | */ |
132 | struct task_struct *kthread_create(int (*threadfn)(void *data), | 149 | struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), |
133 | void *data, | 150 | void *data, |
134 | const char namefmt[], | 151 | int node, |
135 | ...) | 152 | const char namefmt[], |
153 | ...) | ||
136 | { | 154 | { |
137 | struct kthread_create_info create; | 155 | struct kthread_create_info create; |
138 | 156 | ||
139 | create.threadfn = threadfn; | 157 | create.threadfn = threadfn; |
140 | create.data = data; | 158 | create.data = data; |
159 | create.node = node; | ||
141 | init_completion(&create.done); | 160 | init_completion(&create.done); |
142 | 161 | ||
143 | spin_lock(&kthread_create_lock); | 162 | spin_lock(&kthread_create_lock); |
@@ -164,7 +183,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
164 | } | 183 | } |
165 | return create.result; | 184 | return create.result; |
166 | } | 185 | } |
167 | EXPORT_SYMBOL(kthread_create); | 186 | EXPORT_SYMBOL(kthread_create_on_node); |
168 | 187 | ||
169 | /** | 188 | /** |
170 | * kthread_bind - bind a just-created kthread to a cpu. | 189 | * kthread_bind - bind a just-created kthread to a cpu. |
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 1969d2fc4b36..71edd2f60c02 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c | |||
@@ -225,7 +225,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v) | |||
225 | nr_irq_read_safe = 0, nr_irq_read_unsafe = 0, | 225 | nr_irq_read_safe = 0, nr_irq_read_unsafe = 0, |
226 | nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0, | 226 | nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0, |
227 | nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0, | 227 | nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0, |
228 | sum_forward_deps = 0, factor = 0; | 228 | sum_forward_deps = 0; |
229 | 229 | ||
230 | list_for_each_entry(class, &all_lock_classes, lock_entry) { | 230 | list_for_each_entry(class, &all_lock_classes, lock_entry) { |
231 | 231 | ||
@@ -283,13 +283,6 @@ static int lockdep_stats_show(struct seq_file *m, void *v) | |||
283 | nr_hardirq_unsafe * nr_hardirq_safe + | 283 | nr_hardirq_unsafe * nr_hardirq_safe + |
284 | nr_list_entries); | 284 | nr_list_entries); |
285 | 285 | ||
286 | /* | ||
287 | * Estimated factor between direct and indirect | ||
288 | * dependencies: | ||
289 | */ | ||
290 | if (nr_list_entries) | ||
291 | factor = sum_forward_deps / nr_list_entries; | ||
292 | |||
293 | #ifdef CONFIG_PROVE_LOCKING | 286 | #ifdef CONFIG_PROVE_LOCKING |
294 | seq_printf(m, " dependency chains: %11lu [max: %lu]\n", | 287 | seq_printf(m, " dependency chains: %11lu [max: %lu]\n", |
295 | nr_lock_chains, MAX_LOCKDEP_CHAINS); | 288 | nr_lock_chains, MAX_LOCKDEP_CHAINS); |
diff --git a/kernel/module.c b/kernel/module.c index efa290ea94bf..1f9f7bc56ca1 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -1168,7 +1168,7 @@ static ssize_t module_sect_show(struct module_attribute *mattr, | |||
1168 | { | 1168 | { |
1169 | struct module_sect_attr *sattr = | 1169 | struct module_sect_attr *sattr = |
1170 | container_of(mattr, struct module_sect_attr, mattr); | 1170 | container_of(mattr, struct module_sect_attr, mattr); |
1171 | return sprintf(buf, "0x%lx\n", sattr->address); | 1171 | return sprintf(buf, "0x%pK\n", (void *)sattr->address); |
1172 | } | 1172 | } |
1173 | 1173 | ||
1174 | static void free_sect_attrs(struct module_sect_attrs *sect_attrs) | 1174 | static void free_sect_attrs(struct module_sect_attrs *sect_attrs) |
@@ -3224,7 +3224,7 @@ static int m_show(struct seq_file *m, void *p) | |||
3224 | mod->state == MODULE_STATE_COMING ? "Loading": | 3224 | mod->state == MODULE_STATE_COMING ? "Loading": |
3225 | "Live"); | 3225 | "Live"); |
3226 | /* Used by oprofile and other similar tools. */ | 3226 | /* Used by oprofile and other similar tools. */ |
3227 | seq_printf(m, " 0x%p", mod->module_core); | 3227 | seq_printf(m, " 0x%pK", mod->module_core); |
3228 | 3228 | ||
3229 | /* Taints info */ | 3229 | /* Taints info */ |
3230 | if (mod->taints) | 3230 | if (mod->taints) |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index f74e6c00e26d..a05d191ffdd9 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -69,13 +69,13 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, | |||
69 | goto out_ns; | 69 | goto out_ns; |
70 | } | 70 | } |
71 | 71 | ||
72 | new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); | 72 | new_nsp->uts_ns = copy_utsname(flags, tsk); |
73 | if (IS_ERR(new_nsp->uts_ns)) { | 73 | if (IS_ERR(new_nsp->uts_ns)) { |
74 | err = PTR_ERR(new_nsp->uts_ns); | 74 | err = PTR_ERR(new_nsp->uts_ns); |
75 | goto out_uts; | 75 | goto out_uts; |
76 | } | 76 | } |
77 | 77 | ||
78 | new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); | 78 | new_nsp->ipc_ns = copy_ipcs(flags, tsk); |
79 | if (IS_ERR(new_nsp->ipc_ns)) { | 79 | if (IS_ERR(new_nsp->ipc_ns)) { |
80 | err = PTR_ERR(new_nsp->ipc_ns); | 80 | err = PTR_ERR(new_nsp->ipc_ns); |
81 | goto out_ipc; | 81 | goto out_ipc; |
diff --git a/kernel/panic.c b/kernel/panic.c index 991bb87a1704..69231670eb95 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -433,3 +433,13 @@ EXPORT_SYMBOL(__stack_chk_fail); | |||
433 | 433 | ||
434 | core_param(panic, panic_timeout, int, 0644); | 434 | core_param(panic, panic_timeout, int, 0644); |
435 | core_param(pause_on_oops, pause_on_oops, int, 0644); | 435 | core_param(pause_on_oops, pause_on_oops, int, 0644); |
436 | |||
437 | static int __init oops_setup(char *s) | ||
438 | { | ||
439 | if (!s) | ||
440 | return -EINVAL; | ||
441 | if (!strcmp(s, "panic")) | ||
442 | panic_on_oops = 1; | ||
443 | return 0; | ||
444 | } | ||
445 | early_param("oops", oops_setup); | ||
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index ed253aa24ba4..c75925c4d1e2 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -145,7 +145,8 @@ static struct srcu_struct pmus_srcu; | |||
145 | */ | 145 | */ |
146 | int sysctl_perf_event_paranoid __read_mostly = 1; | 146 | int sysctl_perf_event_paranoid __read_mostly = 1; |
147 | 147 | ||
148 | int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ | 148 | /* Minimum for 128 pages + 1 for the user control page */ |
149 | int sysctl_perf_event_mlock __read_mostly = 516; /* 'free' kb per user */ | ||
149 | 150 | ||
150 | /* | 151 | /* |
151 | * max perf event sample rate | 152 | * max perf event sample rate |
@@ -941,6 +942,7 @@ static void perf_group_attach(struct perf_event *event) | |||
941 | static void | 942 | static void |
942 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) | 943 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) |
943 | { | 944 | { |
945 | struct perf_cpu_context *cpuctx; | ||
944 | /* | 946 | /* |
945 | * We can have double detach due to exit/hot-unplug + close. | 947 | * We can have double detach due to exit/hot-unplug + close. |
946 | */ | 948 | */ |
@@ -949,8 +951,17 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
949 | 951 | ||
950 | event->attach_state &= ~PERF_ATTACH_CONTEXT; | 952 | event->attach_state &= ~PERF_ATTACH_CONTEXT; |
951 | 953 | ||
952 | if (is_cgroup_event(event)) | 954 | if (is_cgroup_event(event)) { |
953 | ctx->nr_cgroups--; | 955 | ctx->nr_cgroups--; |
956 | cpuctx = __get_cpu_context(ctx); | ||
957 | /* | ||
958 | * if there are no more cgroup events | ||
959 | * then cler cgrp to avoid stale pointer | ||
960 | * in update_cgrp_time_from_cpuctx() | ||
961 | */ | ||
962 | if (!ctx->nr_cgroups) | ||
963 | cpuctx->cgrp = NULL; | ||
964 | } | ||
954 | 965 | ||
955 | ctx->nr_events--; | 966 | ctx->nr_events--; |
956 | if (event->attr.inherit_stat) | 967 | if (event->attr.inherit_stat) |
@@ -5122,7 +5133,7 @@ static int perf_exclude_event(struct perf_event *event, | |||
5122 | struct pt_regs *regs) | 5133 | struct pt_regs *regs) |
5123 | { | 5134 | { |
5124 | if (event->hw.state & PERF_HES_STOPPED) | 5135 | if (event->hw.state & PERF_HES_STOPPED) |
5125 | return 0; | 5136 | return 1; |
5126 | 5137 | ||
5127 | if (regs) { | 5138 | if (regs) { |
5128 | if (event->attr.exclude_user && user_mode(regs)) | 5139 | if (event->attr.exclude_user && user_mode(regs)) |
@@ -5478,6 +5489,8 @@ static int perf_tp_event_match(struct perf_event *event, | |||
5478 | struct perf_sample_data *data, | 5489 | struct perf_sample_data *data, |
5479 | struct pt_regs *regs) | 5490 | struct pt_regs *regs) |
5480 | { | 5491 | { |
5492 | if (event->hw.state & PERF_HES_STOPPED) | ||
5493 | return 0; | ||
5481 | /* | 5494 | /* |
5482 | * All tracepoints are from kernel-space. | 5495 | * All tracepoints are from kernel-space. |
5483 | */ | 5496 | */ |
@@ -6720,17 +6733,20 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
6720 | struct perf_event_context *child_ctx, | 6733 | struct perf_event_context *child_ctx, |
6721 | struct task_struct *child) | 6734 | struct task_struct *child) |
6722 | { | 6735 | { |
6723 | struct perf_event *parent_event; | 6736 | if (child_event->parent) { |
6737 | raw_spin_lock_irq(&child_ctx->lock); | ||
6738 | perf_group_detach(child_event); | ||
6739 | raw_spin_unlock_irq(&child_ctx->lock); | ||
6740 | } | ||
6724 | 6741 | ||
6725 | perf_remove_from_context(child_event); | 6742 | perf_remove_from_context(child_event); |
6726 | 6743 | ||
6727 | parent_event = child_event->parent; | ||
6728 | /* | 6744 | /* |
6729 | * It can happen that parent exits first, and has events | 6745 | * It can happen that the parent exits first, and has events |
6730 | * that are still around due to the child reference. These | 6746 | * that are still around due to the child reference. These |
6731 | * events need to be zapped - but otherwise linger. | 6747 | * events need to be zapped. |
6732 | */ | 6748 | */ |
6733 | if (parent_event) { | 6749 | if (child_event->parent) { |
6734 | sync_child_event(child_event, child); | 6750 | sync_child_event(child_event, child); |
6735 | free_event(child_event); | 6751 | free_event(child_event); |
6736 | } | 6752 | } |
diff --git a/kernel/pid.c b/kernel/pid.c index 39b65b69584f..02f221274265 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -435,6 +435,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type) | |||
435 | rcu_read_unlock(); | 435 | rcu_read_unlock(); |
436 | return pid; | 436 | return pid; |
437 | } | 437 | } |
438 | EXPORT_SYMBOL_GPL(get_task_pid); | ||
438 | 439 | ||
439 | struct task_struct *get_pid_task(struct pid *pid, enum pid_type type) | 440 | struct task_struct *get_pid_task(struct pid *pid, enum pid_type type) |
440 | { | 441 | { |
@@ -446,6 +447,7 @@ struct task_struct *get_pid_task(struct pid *pid, enum pid_type type) | |||
446 | rcu_read_unlock(); | 447 | rcu_read_unlock(); |
447 | return result; | 448 | return result; |
448 | } | 449 | } |
450 | EXPORT_SYMBOL_GPL(get_pid_task); | ||
449 | 451 | ||
450 | struct pid *find_get_pid(pid_t nr) | 452 | struct pid *find_get_pid(pid_t nr) |
451 | { | 453 | { |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index a5aff94e1f0b..e9c9adc84ca6 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/err.h> | 14 | #include <linux/err.h> |
15 | #include <linux/acct.h> | 15 | #include <linux/acct.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/proc_fs.h> | ||
17 | 18 | ||
18 | #define BITS_PER_PAGE (PAGE_SIZE*8) | 19 | #define BITS_PER_PAGE (PAGE_SIZE*8) |
19 | 20 | ||
@@ -72,7 +73,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p | |||
72 | { | 73 | { |
73 | struct pid_namespace *ns; | 74 | struct pid_namespace *ns; |
74 | unsigned int level = parent_pid_ns->level + 1; | 75 | unsigned int level = parent_pid_ns->level + 1; |
75 | int i; | 76 | int i, err = -ENOMEM; |
76 | 77 | ||
77 | ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); | 78 | ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); |
78 | if (ns == NULL) | 79 | if (ns == NULL) |
@@ -96,14 +97,20 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p | |||
96 | for (i = 1; i < PIDMAP_ENTRIES; i++) | 97 | for (i = 1; i < PIDMAP_ENTRIES; i++) |
97 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | 98 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); |
98 | 99 | ||
100 | err = pid_ns_prepare_proc(ns); | ||
101 | if (err) | ||
102 | goto out_put_parent_pid_ns; | ||
103 | |||
99 | return ns; | 104 | return ns; |
100 | 105 | ||
106 | out_put_parent_pid_ns: | ||
107 | put_pid_ns(parent_pid_ns); | ||
101 | out_free_map: | 108 | out_free_map: |
102 | kfree(ns->pidmap[0].page); | 109 | kfree(ns->pidmap[0].page); |
103 | out_free: | 110 | out_free: |
104 | kmem_cache_free(pid_ns_cachep, ns); | 111 | kmem_cache_free(pid_ns_cachep, ns); |
105 | out: | 112 | out: |
106 | return ERR_PTR(-ENOMEM); | 113 | return ERR_PTR(err); |
107 | } | 114 | } |
108 | 115 | ||
109 | static void destroy_pid_namespace(struct pid_namespace *ns) | 116 | static void destroy_pid_namespace(struct pid_namespace *ns) |
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index aeaa7f846821..0da058bff8eb 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c | |||
@@ -103,11 +103,14 @@ static struct pm_qos_object *pm_qos_array[] = { | |||
103 | 103 | ||
104 | static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, | 104 | static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, |
105 | size_t count, loff_t *f_pos); | 105 | size_t count, loff_t *f_pos); |
106 | static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, | ||
107 | size_t count, loff_t *f_pos); | ||
106 | static int pm_qos_power_open(struct inode *inode, struct file *filp); | 108 | static int pm_qos_power_open(struct inode *inode, struct file *filp); |
107 | static int pm_qos_power_release(struct inode *inode, struct file *filp); | 109 | static int pm_qos_power_release(struct inode *inode, struct file *filp); |
108 | 110 | ||
109 | static const struct file_operations pm_qos_power_fops = { | 111 | static const struct file_operations pm_qos_power_fops = { |
110 | .write = pm_qos_power_write, | 112 | .write = pm_qos_power_write, |
113 | .read = pm_qos_power_read, | ||
111 | .open = pm_qos_power_open, | 114 | .open = pm_qos_power_open, |
112 | .release = pm_qos_power_release, | 115 | .release = pm_qos_power_release, |
113 | .llseek = noop_llseek, | 116 | .llseek = noop_llseek, |
@@ -376,6 +379,27 @@ static int pm_qos_power_release(struct inode *inode, struct file *filp) | |||
376 | } | 379 | } |
377 | 380 | ||
378 | 381 | ||
382 | static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, | ||
383 | size_t count, loff_t *f_pos) | ||
384 | { | ||
385 | s32 value; | ||
386 | unsigned long flags; | ||
387 | struct pm_qos_object *o; | ||
388 | struct pm_qos_request_list *pm_qos_req = filp->private_data;; | ||
389 | |||
390 | if (!pm_qos_req) | ||
391 | return -EINVAL; | ||
392 | if (!pm_qos_request_active(pm_qos_req)) | ||
393 | return -EINVAL; | ||
394 | |||
395 | o = pm_qos_array[pm_qos_req->pm_qos_class]; | ||
396 | spin_lock_irqsave(&pm_qos_lock, flags); | ||
397 | value = pm_qos_get_value(o); | ||
398 | spin_unlock_irqrestore(&pm_qos_lock, flags); | ||
399 | |||
400 | return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32)); | ||
401 | } | ||
402 | |||
379 | static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, | 403 | static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, |
380 | size_t count, loff_t *f_pos) | 404 | size_t count, loff_t *f_pos) |
381 | { | 405 | { |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 265729966ece..4603f08dc47b 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -1,125 +1,12 @@ | |||
1 | config PM | ||
2 | bool "Power Management support" | ||
3 | depends on !IA64_HP_SIM | ||
4 | ---help--- | ||
5 | "Power Management" means that parts of your computer are shut | ||
6 | off or put into a power conserving "sleep" mode if they are not | ||
7 | being used. There are two competing standards for doing this: APM | ||
8 | and ACPI. If you want to use either one, say Y here and then also | ||
9 | to the requisite support below. | ||
10 | |||
11 | Power Management is most important for battery powered laptop | ||
12 | computers; if you have a laptop, check out the Linux Laptop home | ||
13 | page on the WWW at <http://www.linux-on-laptops.com/> or | ||
14 | Tuxmobil - Linux on Mobile Computers at <http://www.tuxmobil.org/> | ||
15 | and the Battery Powered Linux mini-HOWTO, available from | ||
16 | <http://www.tldp.org/docs.html#howto>. | ||
17 | |||
18 | Note that, even if you say N here, Linux on the x86 architecture | ||
19 | will issue the hlt instruction if nothing is to be done, thereby | ||
20 | sending the processor to sleep and saving power. | ||
21 | |||
22 | config PM_DEBUG | ||
23 | bool "Power Management Debug Support" | ||
24 | depends on PM | ||
25 | ---help--- | ||
26 | This option enables various debugging support in the Power Management | ||
27 | code. This is helpful when debugging and reporting PM bugs, like | ||
28 | suspend support. | ||
29 | |||
30 | config PM_ADVANCED_DEBUG | ||
31 | bool "Extra PM attributes in sysfs for low-level debugging/testing" | ||
32 | depends on PM_DEBUG | ||
33 | default n | ||
34 | ---help--- | ||
35 | Add extra sysfs attributes allowing one to access some Power Management | ||
36 | fields of device objects from user space. If you are not a kernel | ||
37 | developer interested in debugging/testing Power Management, say "no". | ||
38 | |||
39 | config PM_VERBOSE | ||
40 | bool "Verbose Power Management debugging" | ||
41 | depends on PM_DEBUG | ||
42 | default n | ||
43 | ---help--- | ||
44 | This option enables verbose messages from the Power Management code. | ||
45 | |||
46 | config CAN_PM_TRACE | ||
47 | def_bool y | ||
48 | depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL | ||
49 | |||
50 | config PM_TRACE | ||
51 | bool | ||
52 | help | ||
53 | This enables code to save the last PM event point across | ||
54 | reboot. The architecture needs to support this, x86 for | ||
55 | example does by saving things in the RTC, see below. | ||
56 | |||
57 | The architecture specific code must provide the extern | ||
58 | functions from <linux/resume-trace.h> as well as the | ||
59 | <asm/resume-trace.h> header with a TRACE_RESUME() macro. | ||
60 | |||
61 | The way the information is presented is architecture- | ||
62 | dependent, x86 will print the information during a | ||
63 | late_initcall. | ||
64 | |||
65 | config PM_TRACE_RTC | ||
66 | bool "Suspend/resume event tracing" | ||
67 | depends on CAN_PM_TRACE | ||
68 | depends on X86 | ||
69 | select PM_TRACE | ||
70 | default n | ||
71 | ---help--- | ||
72 | This enables some cheesy code to save the last PM event point in the | ||
73 | RTC across reboots, so that you can debug a machine that just hangs | ||
74 | during suspend (or more commonly, during resume). | ||
75 | |||
76 | To use this debugging feature you should attempt to suspend the | ||
77 | machine, reboot it and then run | ||
78 | |||
79 | dmesg -s 1000000 | grep 'hash matches' | ||
80 | |||
81 | CAUTION: this option will cause your machine's real-time clock to be | ||
82 | set to an invalid time after a resume. | ||
83 | |||
84 | config PM_SLEEP_SMP | ||
85 | bool | ||
86 | depends on SMP | ||
87 | depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE | ||
88 | depends on PM_SLEEP | ||
89 | select HOTPLUG | ||
90 | select HOTPLUG_CPU | ||
91 | default y | ||
92 | |||
93 | config PM_SLEEP | ||
94 | bool | ||
95 | depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE | ||
96 | default y | ||
97 | |||
98 | config PM_SLEEP_ADVANCED_DEBUG | ||
99 | bool | ||
100 | depends on PM_ADVANCED_DEBUG | ||
101 | default n | ||
102 | |||
103 | config SUSPEND | 1 | config SUSPEND |
104 | bool "Suspend to RAM and standby" | 2 | bool "Suspend to RAM and standby" |
105 | depends on PM && ARCH_SUSPEND_POSSIBLE | 3 | depends on ARCH_SUSPEND_POSSIBLE |
106 | default y | 4 | default y |
107 | ---help--- | 5 | ---help--- |
108 | Allow the system to enter sleep states in which main memory is | 6 | Allow the system to enter sleep states in which main memory is |
109 | powered and thus its contents are preserved, such as the | 7 | powered and thus its contents are preserved, such as the |
110 | suspend-to-RAM state (e.g. the ACPI S3 state). | 8 | suspend-to-RAM state (e.g. the ACPI S3 state). |
111 | 9 | ||
112 | config PM_TEST_SUSPEND | ||
113 | bool "Test suspend/resume and wakealarm during bootup" | ||
114 | depends on SUSPEND && PM_DEBUG && RTC_CLASS=y | ||
115 | ---help--- | ||
116 | This option will let you suspend your machine during bootup, and | ||
117 | make it wake up a few seconds later using an RTC wakeup alarm. | ||
118 | Enable this with a kernel parameter like "test_suspend=mem". | ||
119 | |||
120 | You probably want to have your system's RTC driver statically | ||
121 | linked, ensuring that it's available when this test runs. | ||
122 | |||
123 | config SUSPEND_FREEZER | 10 | config SUSPEND_FREEZER |
124 | bool "Enable freezer for suspend to RAM/standby" \ | 11 | bool "Enable freezer for suspend to RAM/standby" \ |
125 | if ARCH_WANTS_FREEZER_CONTROL || BROKEN | 12 | if ARCH_WANTS_FREEZER_CONTROL || BROKEN |
@@ -133,7 +20,7 @@ config SUSPEND_FREEZER | |||
133 | 20 | ||
134 | config HIBERNATION | 21 | config HIBERNATION |
135 | bool "Hibernation (aka 'suspend to disk')" | 22 | bool "Hibernation (aka 'suspend to disk')" |
136 | depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE | 23 | depends on SWAP && ARCH_HIBERNATION_POSSIBLE |
137 | select LZO_COMPRESS | 24 | select LZO_COMPRESS |
138 | select LZO_DECOMPRESS | 25 | select LZO_DECOMPRESS |
139 | ---help--- | 26 | ---help--- |
@@ -196,6 +83,106 @@ config PM_STD_PARTITION | |||
196 | suspended image to. It will simply pick the first available swap | 83 | suspended image to. It will simply pick the first available swap |
197 | device. | 84 | device. |
198 | 85 | ||
86 | config PM_SLEEP | ||
87 | def_bool y | ||
88 | depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE | ||
89 | |||
90 | config PM_SLEEP_SMP | ||
91 | def_bool y | ||
92 | depends on SMP | ||
93 | depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE | ||
94 | depends on PM_SLEEP | ||
95 | select HOTPLUG | ||
96 | select HOTPLUG_CPU | ||
97 | |||
98 | config PM_RUNTIME | ||
99 | bool "Run-time PM core functionality" | ||
100 | depends on !IA64_HP_SIM | ||
101 | ---help--- | ||
102 | Enable functionality allowing I/O devices to be put into energy-saving | ||
103 | (low power) states at run time (or autosuspended) after a specified | ||
104 | period of inactivity and woken up in response to a hardware-generated | ||
105 | wake-up event or a driver's request. | ||
106 | |||
107 | Hardware support is generally required for this functionality to work | ||
108 | and the bus type drivers of the buses the devices are on are | ||
109 | responsible for the actual handling of the autosuspend requests and | ||
110 | wake-up events. | ||
111 | |||
112 | config PM | ||
113 | def_bool y | ||
114 | depends on PM_SLEEP || PM_RUNTIME | ||
115 | |||
116 | config PM_DEBUG | ||
117 | bool "Power Management Debug Support" | ||
118 | depends on PM | ||
119 | ---help--- | ||
120 | This option enables various debugging support in the Power Management | ||
121 | code. This is helpful when debugging and reporting PM bugs, like | ||
122 | suspend support. | ||
123 | |||
124 | config PM_VERBOSE | ||
125 | bool "Verbose Power Management debugging" | ||
126 | depends on PM_DEBUG | ||
127 | ---help--- | ||
128 | This option enables verbose messages from the Power Management code. | ||
129 | |||
130 | config PM_ADVANCED_DEBUG | ||
131 | bool "Extra PM attributes in sysfs for low-level debugging/testing" | ||
132 | depends on PM_DEBUG | ||
133 | ---help--- | ||
134 | Add extra sysfs attributes allowing one to access some Power Management | ||
135 | fields of device objects from user space. If you are not a kernel | ||
136 | developer interested in debugging/testing Power Management, say "no". | ||
137 | |||
138 | config PM_TEST_SUSPEND | ||
139 | bool "Test suspend/resume and wakealarm during bootup" | ||
140 | depends on SUSPEND && PM_DEBUG && RTC_CLASS=y | ||
141 | ---help--- | ||
142 | This option will let you suspend your machine during bootup, and | ||
143 | make it wake up a few seconds later using an RTC wakeup alarm. | ||
144 | Enable this with a kernel parameter like "test_suspend=mem". | ||
145 | |||
146 | You probably want to have your system's RTC driver statically | ||
147 | linked, ensuring that it's available when this test runs. | ||
148 | |||
149 | config CAN_PM_TRACE | ||
150 | def_bool y | ||
151 | depends on PM_DEBUG && PM_SLEEP | ||
152 | |||
153 | config PM_TRACE | ||
154 | bool | ||
155 | help | ||
156 | This enables code to save the last PM event point across | ||
157 | reboot. The architecture needs to support this, x86 for | ||
158 | example does by saving things in the RTC, see below. | ||
159 | |||
160 | The architecture specific code must provide the extern | ||
161 | functions from <linux/resume-trace.h> as well as the | ||
162 | <asm/resume-trace.h> header with a TRACE_RESUME() macro. | ||
163 | |||
164 | The way the information is presented is architecture- | ||
165 | dependent, x86 will print the information during a | ||
166 | late_initcall. | ||
167 | |||
168 | config PM_TRACE_RTC | ||
169 | bool "Suspend/resume event tracing" | ||
170 | depends on CAN_PM_TRACE | ||
171 | depends on X86 | ||
172 | select PM_TRACE | ||
173 | ---help--- | ||
174 | This enables some cheesy code to save the last PM event point in the | ||
175 | RTC across reboots, so that you can debug a machine that just hangs | ||
176 | during suspend (or more commonly, during resume). | ||
177 | |||
178 | To use this debugging feature you should attempt to suspend the | ||
179 | machine, reboot it and then run | ||
180 | |||
181 | dmesg -s 1000000 | grep 'hash matches' | ||
182 | |||
183 | CAUTION: this option will cause your machine's real-time clock to be | ||
184 | set to an invalid time after a resume. | ||
185 | |||
199 | config APM_EMULATION | 186 | config APM_EMULATION |
200 | tristate "Advanced Power Management Emulation" | 187 | tristate "Advanced Power Management Emulation" |
201 | depends on PM && SYS_SUPPORTS_APM_EMULATION | 188 | depends on PM && SYS_SUPPORTS_APM_EMULATION |
@@ -222,31 +209,11 @@ config APM_EMULATION | |||
222 | anything, try disabling/enabling this option (or disabling/enabling | 209 | anything, try disabling/enabling this option (or disabling/enabling |
223 | APM in your BIOS). | 210 | APM in your BIOS). |
224 | 211 | ||
225 | config PM_RUNTIME | ||
226 | bool "Run-time PM core functionality" | ||
227 | depends on PM | ||
228 | ---help--- | ||
229 | Enable functionality allowing I/O devices to be put into energy-saving | ||
230 | (low power) states at run time (or autosuspended) after a specified | ||
231 | period of inactivity and woken up in response to a hardware-generated | ||
232 | wake-up event or a driver's request. | ||
233 | |||
234 | Hardware support is generally required for this functionality to work | ||
235 | and the bus type drivers of the buses the devices are on are | ||
236 | responsible for the actual handling of the autosuspend requests and | ||
237 | wake-up events. | ||
238 | |||
239 | config PM_OPS | ||
240 | bool | ||
241 | depends on PM_SLEEP || PM_RUNTIME | ||
242 | default y | ||
243 | |||
244 | config ARCH_HAS_OPP | 212 | config ARCH_HAS_OPP |
245 | bool | 213 | bool |
246 | 214 | ||
247 | config PM_OPP | 215 | config PM_OPP |
248 | bool "Operating Performance Point (OPP) Layer library" | 216 | bool "Operating Performance Point (OPP) Layer library" |
249 | depends on PM | ||
250 | depends on ARCH_HAS_OPP | 217 | depends on ARCH_HAS_OPP |
251 | ---help--- | 218 | ---help--- |
252 | SOCs have a standard set of tuples consisting of frequency and | 219 | SOCs have a standard set of tuples consisting of frequency and |
diff --git a/kernel/power/Makefile b/kernel/power/Makefile index c350e18b53e3..c5ebc6a90643 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile | |||
@@ -1,4 +1,5 @@ | |||
1 | ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG | 1 | |
2 | ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG | ||
2 | 3 | ||
3 | obj-$(CONFIG_PM) += main.o | 4 | obj-$(CONFIG_PM) += main.o |
4 | obj-$(CONFIG_PM_SLEEP) += console.o | 5 | obj-$(CONFIG_PM_SLEEP) += console.o |
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c index 83bbc7c02df9..d09dd10c5a5e 100644 --- a/kernel/power/block_io.c +++ b/kernel/power/block_io.c | |||
@@ -28,7 +28,7 @@ | |||
28 | static int submit(int rw, struct block_device *bdev, sector_t sector, | 28 | static int submit(int rw, struct block_device *bdev, sector_t sector, |
29 | struct page *page, struct bio **bio_chain) | 29 | struct page *page, struct bio **bio_chain) |
30 | { | 30 | { |
31 | const int bio_rw = rw | REQ_SYNC | REQ_UNPLUG; | 31 | const int bio_rw = rw | REQ_SYNC; |
32 | struct bio *bio; | 32 | struct bio *bio; |
33 | 33 | ||
34 | bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); | 34 | bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 1832bd264219..aeabd26e3342 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/cpu.h> | 23 | #include <linux/cpu.h> |
24 | #include <linux/freezer.h> | 24 | #include <linux/freezer.h> |
25 | #include <linux/gfp.h> | 25 | #include <linux/gfp.h> |
26 | #include <linux/syscore_ops.h> | ||
26 | #include <scsi/scsi_scan.h> | 27 | #include <scsi/scsi_scan.h> |
27 | #include <asm/suspend.h> | 28 | #include <asm/suspend.h> |
28 | 29 | ||
@@ -272,6 +273,8 @@ static int create_image(int platform_mode) | |||
272 | local_irq_disable(); | 273 | local_irq_disable(); |
273 | 274 | ||
274 | error = sysdev_suspend(PMSG_FREEZE); | 275 | error = sysdev_suspend(PMSG_FREEZE); |
276 | if (!error) | ||
277 | error = syscore_suspend(); | ||
275 | if (error) { | 278 | if (error) { |
276 | printk(KERN_ERR "PM: Some system devices failed to power down, " | 279 | printk(KERN_ERR "PM: Some system devices failed to power down, " |
277 | "aborting hibernation\n"); | 280 | "aborting hibernation\n"); |
@@ -295,6 +298,7 @@ static int create_image(int platform_mode) | |||
295 | } | 298 | } |
296 | 299 | ||
297 | Power_up: | 300 | Power_up: |
301 | syscore_resume(); | ||
298 | sysdev_resume(); | 302 | sysdev_resume(); |
299 | /* NOTE: dpm_resume_noirq() is just a resume() for devices | 303 | /* NOTE: dpm_resume_noirq() is just a resume() for devices |
300 | * that suspended with irqs off ... no overall powerup. | 304 | * that suspended with irqs off ... no overall powerup. |
@@ -403,6 +407,8 @@ static int resume_target_kernel(bool platform_mode) | |||
403 | local_irq_disable(); | 407 | local_irq_disable(); |
404 | 408 | ||
405 | error = sysdev_suspend(PMSG_QUIESCE); | 409 | error = sysdev_suspend(PMSG_QUIESCE); |
410 | if (!error) | ||
411 | error = syscore_suspend(); | ||
406 | if (error) | 412 | if (error) |
407 | goto Enable_irqs; | 413 | goto Enable_irqs; |
408 | 414 | ||
@@ -429,6 +435,7 @@ static int resume_target_kernel(bool platform_mode) | |||
429 | restore_processor_state(); | 435 | restore_processor_state(); |
430 | touch_softlockup_watchdog(); | 436 | touch_softlockup_watchdog(); |
431 | 437 | ||
438 | syscore_resume(); | ||
432 | sysdev_resume(); | 439 | sysdev_resume(); |
433 | 440 | ||
434 | Enable_irqs: | 441 | Enable_irqs: |
@@ -516,6 +523,7 @@ int hibernation_platform_enter(void) | |||
516 | 523 | ||
517 | local_irq_disable(); | 524 | local_irq_disable(); |
518 | sysdev_suspend(PMSG_HIBERNATE); | 525 | sysdev_suspend(PMSG_HIBERNATE); |
526 | syscore_suspend(); | ||
519 | if (pm_wakeup_pending()) { | 527 | if (pm_wakeup_pending()) { |
520 | error = -EAGAIN; | 528 | error = -EAGAIN; |
521 | goto Power_up; | 529 | goto Power_up; |
@@ -526,6 +534,7 @@ int hibernation_platform_enter(void) | |||
526 | while (1); | 534 | while (1); |
527 | 535 | ||
528 | Power_up: | 536 | Power_up: |
537 | syscore_resume(); | ||
529 | sysdev_resume(); | 538 | sysdev_resume(); |
530 | local_irq_enable(); | 539 | local_irq_enable(); |
531 | enable_nonboot_cpus(); | 540 | enable_nonboot_cpus(); |
diff --git a/kernel/power/main.c b/kernel/power/main.c index 701853042c28..8eaba5f27b10 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -17,9 +17,6 @@ | |||
17 | 17 | ||
18 | DEFINE_MUTEX(pm_mutex); | 18 | DEFINE_MUTEX(pm_mutex); |
19 | 19 | ||
20 | unsigned int pm_flags; | ||
21 | EXPORT_SYMBOL(pm_flags); | ||
22 | |||
23 | #ifdef CONFIG_PM_SLEEP | 20 | #ifdef CONFIG_PM_SLEEP |
24 | 21 | ||
25 | /* Routines for PM-transition notifications */ | 22 | /* Routines for PM-transition notifications */ |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 64db648ff911..ca0aacc24874 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -42,15 +42,15 @@ static void swsusp_unset_page_forbidden(struct page *); | |||
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Preferred image size in bytes (tunable via /sys/power/image_size). | 44 | * Preferred image size in bytes (tunable via /sys/power/image_size). |
45 | * When it is set to N, swsusp will do its best to ensure the image | 45 | * When it is set to N, the image creating code will do its best to |
46 | * size will not exceed N bytes, but if that is impossible, it will | 46 | * ensure the image size will not exceed N bytes, but if that is |
47 | * try to create the smallest image possible. | 47 | * impossible, it will try to create the smallest image possible. |
48 | */ | 48 | */ |
49 | unsigned long image_size; | 49 | unsigned long image_size; |
50 | 50 | ||
51 | void __init hibernate_image_size_init(void) | 51 | void __init hibernate_image_size_init(void) |
52 | { | 52 | { |
53 | image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; | 53 | image_size = (totalram_pages / 3) * PAGE_SIZE; |
54 | } | 54 | } |
55 | 55 | ||
56 | /* List of PBEs needed for restoring the pages that were allocated before | 56 | /* List of PBEs needed for restoring the pages that were allocated before |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index de6f86bfa303..2814c32aed51 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/suspend.h> | 24 | #include <linux/suspend.h> |
25 | #include <linux/syscore_ops.h> | ||
25 | #include <trace/events/power.h> | 26 | #include <trace/events/power.h> |
26 | 27 | ||
27 | #include "power.h" | 28 | #include "power.h" |
@@ -163,11 +164,14 @@ static int suspend_enter(suspend_state_t state) | |||
163 | BUG_ON(!irqs_disabled()); | 164 | BUG_ON(!irqs_disabled()); |
164 | 165 | ||
165 | error = sysdev_suspend(PMSG_SUSPEND); | 166 | error = sysdev_suspend(PMSG_SUSPEND); |
167 | if (!error) | ||
168 | error = syscore_suspend(); | ||
166 | if (!error) { | 169 | if (!error) { |
167 | if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) { | 170 | if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) { |
168 | error = suspend_ops->enter(state); | 171 | error = suspend_ops->enter(state); |
169 | events_check_enabled = false; | 172 | events_check_enabled = false; |
170 | } | 173 | } |
174 | syscore_resume(); | ||
171 | sysdev_resume(); | 175 | sysdev_resume(); |
172 | } | 176 | } |
173 | 177 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index 36231525e22f..da8ca817eae3 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -53,7 +53,7 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) | |||
53 | #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) | 53 | #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) |
54 | 54 | ||
55 | /* printk's without a loglevel use this.. */ | 55 | /* printk's without a loglevel use this.. */ |
56 | #define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */ | 56 | #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL |
57 | 57 | ||
58 | /* We show everything that is MORE important than this.. */ | 58 | /* We show everything that is MORE important than this.. */ |
59 | #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ | 59 | #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ |
@@ -113,6 +113,11 @@ static unsigned con_start; /* Index into log_buf: next char to be sent to consol | |||
113 | static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */ | 113 | static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */ |
114 | 114 | ||
115 | /* | 115 | /* |
116 | * If exclusive_console is non-NULL then only this console is to be printed to. | ||
117 | */ | ||
118 | static struct console *exclusive_console; | ||
119 | |||
120 | /* | ||
116 | * Array of consoles built from command line options (console=) | 121 | * Array of consoles built from command line options (console=) |
117 | */ | 122 | */ |
118 | struct console_cmdline | 123 | struct console_cmdline |
@@ -476,6 +481,8 @@ static void __call_console_drivers(unsigned start, unsigned end) | |||
476 | struct console *con; | 481 | struct console *con; |
477 | 482 | ||
478 | for_each_console(con) { | 483 | for_each_console(con) { |
484 | if (exclusive_console && con != exclusive_console) | ||
485 | continue; | ||
479 | if ((con->flags & CON_ENABLED) && con->write && | 486 | if ((con->flags & CON_ENABLED) && con->write && |
480 | (cpu_online(smp_processor_id()) || | 487 | (cpu_online(smp_processor_id()) || |
481 | (con->flags & CON_ANYTIME))) | 488 | (con->flags & CON_ANYTIME))) |
@@ -515,6 +522,71 @@ static void _call_console_drivers(unsigned start, | |||
515 | } | 522 | } |
516 | 523 | ||
517 | /* | 524 | /* |
525 | * Parse the syslog header <[0-9]*>. The decimal value represents 32bit, the | ||
526 | * lower 3 bit are the log level, the rest are the log facility. In case | ||
527 | * userspace passes usual userspace syslog messages to /dev/kmsg or | ||
528 | * /dev/ttyprintk, the log prefix might contain the facility. Printk needs | ||
529 | * to extract the correct log level for in-kernel processing, and not mangle | ||
530 | * the original value. | ||
531 | * | ||
532 | * If a prefix is found, the length of the prefix is returned. If 'level' is | ||
533 | * passed, it will be filled in with the log level without a possible facility | ||
534 | * value. If 'special' is passed, the special printk prefix chars are accepted | ||
535 | * and returned. If no valid header is found, 0 is returned and the passed | ||
536 | * variables are not touched. | ||
537 | */ | ||
538 | static size_t log_prefix(const char *p, unsigned int *level, char *special) | ||
539 | { | ||
540 | unsigned int lev = 0; | ||
541 | char sp = '\0'; | ||
542 | size_t len; | ||
543 | |||
544 | if (p[0] != '<' || !p[1]) | ||
545 | return 0; | ||
546 | if (p[2] == '>') { | ||
547 | /* usual single digit level number or special char */ | ||
548 | switch (p[1]) { | ||
549 | case '0' ... '7': | ||
550 | lev = p[1] - '0'; | ||
551 | break; | ||
552 | case 'c': /* KERN_CONT */ | ||
553 | case 'd': /* KERN_DEFAULT */ | ||
554 | sp = p[1]; | ||
555 | break; | ||
556 | default: | ||
557 | return 0; | ||
558 | } | ||
559 | len = 3; | ||
560 | } else { | ||
561 | /* multi digit including the level and facility number */ | ||
562 | char *endp = NULL; | ||
563 | |||
564 | if (p[1] < '0' && p[1] > '9') | ||
565 | return 0; | ||
566 | |||
567 | lev = (simple_strtoul(&p[1], &endp, 10) & 7); | ||
568 | if (endp == NULL || endp[0] != '>') | ||
569 | return 0; | ||
570 | len = (endp + 1) - p; | ||
571 | } | ||
572 | |||
573 | /* do not accept special char if not asked for */ | ||
574 | if (sp && !special) | ||
575 | return 0; | ||
576 | |||
577 | if (special) { | ||
578 | *special = sp; | ||
579 | /* return special char, do not touch level */ | ||
580 | if (sp) | ||
581 | return len; | ||
582 | } | ||
583 | |||
584 | if (level) | ||
585 | *level = lev; | ||
586 | return len; | ||
587 | } | ||
588 | |||
589 | /* | ||
518 | * Call the console drivers, asking them to write out | 590 | * Call the console drivers, asking them to write out |
519 | * log_buf[start] to log_buf[end - 1]. | 591 | * log_buf[start] to log_buf[end - 1]. |
520 | * The console_lock must be held. | 592 | * The console_lock must be held. |
@@ -529,13 +601,9 @@ static void call_console_drivers(unsigned start, unsigned end) | |||
529 | cur_index = start; | 601 | cur_index = start; |
530 | start_print = start; | 602 | start_print = start; |
531 | while (cur_index != end) { | 603 | while (cur_index != end) { |
532 | if (msg_level < 0 && ((end - cur_index) > 2) && | 604 | if (msg_level < 0 && ((end - cur_index) > 2)) { |
533 | LOG_BUF(cur_index + 0) == '<' && | 605 | /* strip log prefix */ |
534 | LOG_BUF(cur_index + 1) >= '0' && | 606 | cur_index += log_prefix(&LOG_BUF(cur_index), &msg_level, NULL); |
535 | LOG_BUF(cur_index + 1) <= '7' && | ||
536 | LOG_BUF(cur_index + 2) == '>') { | ||
537 | msg_level = LOG_BUF(cur_index + 1) - '0'; | ||
538 | cur_index += 3; | ||
539 | start_print = cur_index; | 607 | start_print = cur_index; |
540 | } | 608 | } |
541 | while (cur_index != end) { | 609 | while (cur_index != end) { |
@@ -733,6 +801,8 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
733 | unsigned long flags; | 801 | unsigned long flags; |
734 | int this_cpu; | 802 | int this_cpu; |
735 | char *p; | 803 | char *p; |
804 | size_t plen; | ||
805 | char special; | ||
736 | 806 | ||
737 | boot_delay_msec(); | 807 | boot_delay_msec(); |
738 | printk_delay(); | 808 | printk_delay(); |
@@ -773,45 +843,52 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
773 | printed_len += vscnprintf(printk_buf + printed_len, | 843 | printed_len += vscnprintf(printk_buf + printed_len, |
774 | sizeof(printk_buf) - printed_len, fmt, args); | 844 | sizeof(printk_buf) - printed_len, fmt, args); |
775 | 845 | ||
776 | |||
777 | p = printk_buf; | 846 | p = printk_buf; |
778 | 847 | ||
779 | /* Do we have a loglevel in the string? */ | 848 | /* Read log level and handle special printk prefix */ |
780 | if (p[0] == '<') { | 849 | plen = log_prefix(p, ¤t_log_level, &special); |
781 | unsigned char c = p[1]; | 850 | if (plen) { |
782 | if (c && p[2] == '>') { | 851 | p += plen; |
783 | switch (c) { | 852 | |
784 | case '0' ... '7': /* loglevel */ | 853 | switch (special) { |
785 | current_log_level = c - '0'; | 854 | case 'c': /* Strip <c> KERN_CONT, continue line */ |
786 | /* Fallthrough - make sure we're on a new line */ | 855 | plen = 0; |
787 | case 'd': /* KERN_DEFAULT */ | 856 | break; |
788 | if (!new_text_line) { | 857 | case 'd': /* Strip <d> KERN_DEFAULT, start new line */ |
789 | emit_log_char('\n'); | 858 | plen = 0; |
790 | new_text_line = 1; | 859 | default: |
791 | } | 860 | if (!new_text_line) { |
792 | /* Fallthrough - skip the loglevel */ | 861 | emit_log_char('\n'); |
793 | case 'c': /* KERN_CONT */ | 862 | new_text_line = 1; |
794 | p += 3; | ||
795 | break; | ||
796 | } | 863 | } |
797 | } | 864 | } |
798 | } | 865 | } |
799 | 866 | ||
800 | /* | 867 | /* |
801 | * Copy the output into log_buf. If the caller didn't provide | 868 | * Copy the output into log_buf. If the caller didn't provide |
802 | * appropriate log level tags, we insert them here | 869 | * the appropriate log prefix, we insert them here |
803 | */ | 870 | */ |
804 | for ( ; *p; p++) { | 871 | for (; *p; p++) { |
805 | if (new_text_line) { | 872 | if (new_text_line) { |
806 | /* Always output the token */ | ||
807 | emit_log_char('<'); | ||
808 | emit_log_char(current_log_level + '0'); | ||
809 | emit_log_char('>'); | ||
810 | printed_len += 3; | ||
811 | new_text_line = 0; | 873 | new_text_line = 0; |
812 | 874 | ||
875 | if (plen) { | ||
876 | /* Copy original log prefix */ | ||
877 | int i; | ||
878 | |||
879 | for (i = 0; i < plen; i++) | ||
880 | emit_log_char(printk_buf[i]); | ||
881 | printed_len += plen; | ||
882 | } else { | ||
883 | /* Add log prefix */ | ||
884 | emit_log_char('<'); | ||
885 | emit_log_char(current_log_level + '0'); | ||
886 | emit_log_char('>'); | ||
887 | printed_len += 3; | ||
888 | } | ||
889 | |||
813 | if (printk_time) { | 890 | if (printk_time) { |
814 | /* Follow the token with the time */ | 891 | /* Add the current time stamp */ |
815 | char tbuf[50], *tp; | 892 | char tbuf[50], *tp; |
816 | unsigned tlen; | 893 | unsigned tlen; |
817 | unsigned long long t; | 894 | unsigned long long t; |
@@ -1160,6 +1237,11 @@ void console_unlock(void) | |||
1160 | local_irq_restore(flags); | 1237 | local_irq_restore(flags); |
1161 | } | 1238 | } |
1162 | console_locked = 0; | 1239 | console_locked = 0; |
1240 | |||
1241 | /* Release the exclusive_console once it is used */ | ||
1242 | if (unlikely(exclusive_console)) | ||
1243 | exclusive_console = NULL; | ||
1244 | |||
1163 | up(&console_sem); | 1245 | up(&console_sem); |
1164 | spin_unlock_irqrestore(&logbuf_lock, flags); | 1246 | spin_unlock_irqrestore(&logbuf_lock, flags); |
1165 | if (wake_klogd) | 1247 | if (wake_klogd) |
@@ -1246,6 +1328,18 @@ void console_start(struct console *console) | |||
1246 | } | 1328 | } |
1247 | EXPORT_SYMBOL(console_start); | 1329 | EXPORT_SYMBOL(console_start); |
1248 | 1330 | ||
1331 | static int __read_mostly keep_bootcon; | ||
1332 | |||
1333 | static int __init keep_bootcon_setup(char *str) | ||
1334 | { | ||
1335 | keep_bootcon = 1; | ||
1336 | printk(KERN_INFO "debug: skip boot console de-registration.\n"); | ||
1337 | |||
1338 | return 0; | ||
1339 | } | ||
1340 | |||
1341 | early_param("keep_bootcon", keep_bootcon_setup); | ||
1342 | |||
1249 | /* | 1343 | /* |
1250 | * The console driver calls this routine during kernel initialization | 1344 | * The console driver calls this routine during kernel initialization |
1251 | * to register the console printing procedure with printk() and to | 1345 | * to register the console printing procedure with printk() and to |
@@ -1382,6 +1476,12 @@ void register_console(struct console *newcon) | |||
1382 | spin_lock_irqsave(&logbuf_lock, flags); | 1476 | spin_lock_irqsave(&logbuf_lock, flags); |
1383 | con_start = log_start; | 1477 | con_start = log_start; |
1384 | spin_unlock_irqrestore(&logbuf_lock, flags); | 1478 | spin_unlock_irqrestore(&logbuf_lock, flags); |
1479 | /* | ||
1480 | * We're about to replay the log buffer. Only do this to the | ||
1481 | * just-registered console to avoid excessive message spam to | ||
1482 | * the already-registered consoles. | ||
1483 | */ | ||
1484 | exclusive_console = newcon; | ||
1385 | } | 1485 | } |
1386 | console_unlock(); | 1486 | console_unlock(); |
1387 | console_sysfs_notify(); | 1487 | console_sysfs_notify(); |
@@ -1393,7 +1493,9 @@ void register_console(struct console *newcon) | |||
1393 | * users know there might be something in the kernel's log buffer that | 1493 | * users know there might be something in the kernel's log buffer that |
1394 | * went to the bootconsole (that they do not see on the real console) | 1494 | * went to the bootconsole (that they do not see on the real console) |
1395 | */ | 1495 | */ |
1396 | if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) { | 1496 | if (bcon && |
1497 | ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) && | ||
1498 | !keep_bootcon) { | ||
1397 | /* we need to iterate through twice, to make sure we print | 1499 | /* we need to iterate through twice, to make sure we print |
1398 | * everything out, before we unregister the console(s) | 1500 | * everything out, before we unregister the console(s) |
1399 | */ | 1501 | */ |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index e2302e40b360..0fc1eed28d27 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -134,21 +134,24 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) | |||
134 | return 0; | 134 | return 0; |
135 | rcu_read_lock(); | 135 | rcu_read_lock(); |
136 | tcred = __task_cred(task); | 136 | tcred = __task_cred(task); |
137 | if ((cred->uid != tcred->euid || | 137 | if (cred->user->user_ns == tcred->user->user_ns && |
138 | cred->uid != tcred->suid || | 138 | (cred->uid == tcred->euid && |
139 | cred->uid != tcred->uid || | 139 | cred->uid == tcred->suid && |
140 | cred->gid != tcred->egid || | 140 | cred->uid == tcred->uid && |
141 | cred->gid != tcred->sgid || | 141 | cred->gid == tcred->egid && |
142 | cred->gid != tcred->gid) && | 142 | cred->gid == tcred->sgid && |
143 | !capable(CAP_SYS_PTRACE)) { | 143 | cred->gid == tcred->gid)) |
144 | rcu_read_unlock(); | 144 | goto ok; |
145 | return -EPERM; | 145 | if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE)) |
146 | } | 146 | goto ok; |
147 | rcu_read_unlock(); | ||
148 | return -EPERM; | ||
149 | ok: | ||
147 | rcu_read_unlock(); | 150 | rcu_read_unlock(); |
148 | smp_rmb(); | 151 | smp_rmb(); |
149 | if (task->mm) | 152 | if (task->mm) |
150 | dumpable = get_dumpable(task->mm); | 153 | dumpable = get_dumpable(task->mm); |
151 | if (!dumpable && !capable(CAP_SYS_PTRACE)) | 154 | if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE)) |
152 | return -EPERM; | 155 | return -EPERM; |
153 | 156 | ||
154 | return security_ptrace_access_check(task, mode); | 157 | return security_ptrace_access_check(task, mode); |
@@ -198,7 +201,7 @@ static int ptrace_attach(struct task_struct *task) | |||
198 | goto unlock_tasklist; | 201 | goto unlock_tasklist; |
199 | 202 | ||
200 | task->ptrace = PT_PTRACED; | 203 | task->ptrace = PT_PTRACED; |
201 | if (capable(CAP_SYS_PTRACE)) | 204 | if (task_ns_capable(task, CAP_SYS_PTRACE)) |
202 | task->ptrace |= PT_PTRACE_CAP; | 205 | task->ptrace |= PT_PTRACE_CAP; |
203 | 206 | ||
204 | __ptrace_link(task, current); | 207 | __ptrace_link(task, current); |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index a23a57a976d1..f3240e987928 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -214,11 +214,12 @@ static int rcuhead_fixup_free(void *addr, enum debug_obj_state state) | |||
214 | * Ensure that queued callbacks are all executed. | 214 | * Ensure that queued callbacks are all executed. |
215 | * If we detect that we are nested in a RCU read-side critical | 215 | * If we detect that we are nested in a RCU read-side critical |
216 | * section, we should simply fail, otherwise we would deadlock. | 216 | * section, we should simply fail, otherwise we would deadlock. |
217 | * Note that the machinery to reliably determine whether | ||
218 | * or not we are in an RCU read-side critical section | ||
219 | * exists only in the preemptible RCU implementations | ||
220 | * (TINY_PREEMPT_RCU and TREE_PREEMPT_RCU), which is why | ||
221 | * DEBUG_OBJECTS_RCU_HEAD is disallowed if !PREEMPT. | ||
217 | */ | 222 | */ |
218 | #ifndef CONFIG_PREEMPT | ||
219 | WARN_ON(1); | ||
220 | return 0; | ||
221 | #else | ||
222 | if (rcu_preempt_depth() != 0 || preempt_count() != 0 || | 223 | if (rcu_preempt_depth() != 0 || preempt_count() != 0 || |
223 | irqs_disabled()) { | 224 | irqs_disabled()) { |
224 | WARN_ON(1); | 225 | WARN_ON(1); |
@@ -229,7 +230,6 @@ static int rcuhead_fixup_free(void *addr, enum debug_obj_state state) | |||
229 | rcu_barrier_bh(); | 230 | rcu_barrier_bh(); |
230 | debug_object_free(head, &rcuhead_debug_descr); | 231 | debug_object_free(head, &rcuhead_debug_descr); |
231 | return 1; | 232 | return 1; |
232 | #endif | ||
233 | default: | 233 | default: |
234 | return 0; | 234 | return 0; |
235 | } | 235 | } |
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 015abaea962a..3cb8e362e883 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
@@ -852,7 +852,7 @@ void exit_rcu(void) | |||
852 | if (t->rcu_read_lock_nesting == 0) | 852 | if (t->rcu_read_lock_nesting == 0) |
853 | return; | 853 | return; |
854 | t->rcu_read_lock_nesting = 1; | 854 | t->rcu_read_lock_nesting = 1; |
855 | rcu_read_unlock(); | 855 | __rcu_read_unlock(); |
856 | } | 856 | } |
857 | 857 | ||
858 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ | 858 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 89613f97ff26..c224da41890c 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -47,7 +47,6 @@ | |||
47 | #include <linux/srcu.h> | 47 | #include <linux/srcu.h> |
48 | #include <linux/slab.h> | 48 | #include <linux/slab.h> |
49 | #include <asm/byteorder.h> | 49 | #include <asm/byteorder.h> |
50 | #include <linux/sched.h> | ||
51 | 50 | ||
52 | MODULE_LICENSE("GPL"); | 51 | MODULE_LICENSE("GPL"); |
53 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " | 52 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index c7eaa37a768b..34683efa2cce 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
@@ -126,10 +126,24 @@ ssize_t res_counter_read(struct res_counter *counter, int member, | |||
126 | pos, buf, s - buf); | 126 | pos, buf, s - buf); |
127 | } | 127 | } |
128 | 128 | ||
129 | #if BITS_PER_LONG == 32 | ||
130 | u64 res_counter_read_u64(struct res_counter *counter, int member) | ||
131 | { | ||
132 | unsigned long flags; | ||
133 | u64 ret; | ||
134 | |||
135 | spin_lock_irqsave(&counter->lock, flags); | ||
136 | ret = *res_counter_member(counter, member); | ||
137 | spin_unlock_irqrestore(&counter->lock, flags); | ||
138 | |||
139 | return ret; | ||
140 | } | ||
141 | #else | ||
129 | u64 res_counter_read_u64(struct res_counter *counter, int member) | 142 | u64 res_counter_read_u64(struct res_counter *counter, int member) |
130 | { | 143 | { |
131 | return *res_counter_member(counter, member); | 144 | return *res_counter_member(counter, member); |
132 | } | 145 | } |
146 | #endif | ||
133 | 147 | ||
134 | int res_counter_memparse_write_strategy(const char *buf, | 148 | int res_counter_memparse_write_strategy(const char *buf, |
135 | unsigned long long *res) | 149 | unsigned long long *res) |
diff --git a/kernel/sched.c b/kernel/sched.c index a361e20ec2cd..f592ce6f8616 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -32,7 +32,6 @@ | |||
32 | #include <linux/init.h> | 32 | #include <linux/init.h> |
33 | #include <linux/uaccess.h> | 33 | #include <linux/uaccess.h> |
34 | #include <linux/highmem.h> | 34 | #include <linux/highmem.h> |
35 | #include <linux/smp_lock.h> | ||
36 | #include <asm/mmu_context.h> | 35 | #include <asm/mmu_context.h> |
37 | #include <linux/interrupt.h> | 36 | #include <linux/interrupt.h> |
38 | #include <linux/capability.h> | 37 | #include <linux/capability.h> |
@@ -4086,9 +4085,6 @@ need_resched: | |||
4086 | rcu_note_context_switch(cpu); | 4085 | rcu_note_context_switch(cpu); |
4087 | prev = rq->curr; | 4086 | prev = rq->curr; |
4088 | 4087 | ||
4089 | release_kernel_lock(prev); | ||
4090 | need_resched_nonpreemptible: | ||
4091 | |||
4092 | schedule_debug(prev); | 4088 | schedule_debug(prev); |
4093 | 4089 | ||
4094 | if (sched_feat(HRTICK)) | 4090 | if (sched_feat(HRTICK)) |
@@ -4119,6 +4115,16 @@ need_resched_nonpreemptible: | |||
4119 | switch_count = &prev->nvcsw; | 4115 | switch_count = &prev->nvcsw; |
4120 | } | 4116 | } |
4121 | 4117 | ||
4118 | /* | ||
4119 | * If we are going to sleep and we have plugged IO queued, make | ||
4120 | * sure to submit it to avoid deadlocks. | ||
4121 | */ | ||
4122 | if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) { | ||
4123 | raw_spin_unlock(&rq->lock); | ||
4124 | blk_flush_plug(prev); | ||
4125 | raw_spin_lock(&rq->lock); | ||
4126 | } | ||
4127 | |||
4122 | pre_schedule(rq, prev); | 4128 | pre_schedule(rq, prev); |
4123 | 4129 | ||
4124 | if (unlikely(!rq->nr_running)) | 4130 | if (unlikely(!rq->nr_running)) |
@@ -4148,9 +4154,6 @@ need_resched_nonpreemptible: | |||
4148 | 4154 | ||
4149 | post_schedule(rq); | 4155 | post_schedule(rq); |
4150 | 4156 | ||
4151 | if (unlikely(reacquire_kernel_lock(prev))) | ||
4152 | goto need_resched_nonpreemptible; | ||
4153 | |||
4154 | preempt_enable_no_resched(); | 4157 | preempt_enable_no_resched(); |
4155 | if (need_resched()) | 4158 | if (need_resched()) |
4156 | goto need_resched; | 4159 | goto need_resched; |
@@ -4899,8 +4902,11 @@ static bool check_same_owner(struct task_struct *p) | |||
4899 | 4902 | ||
4900 | rcu_read_lock(); | 4903 | rcu_read_lock(); |
4901 | pcred = __task_cred(p); | 4904 | pcred = __task_cred(p); |
4902 | match = (cred->euid == pcred->euid || | 4905 | if (cred->user->user_ns == pcred->user->user_ns) |
4903 | cred->euid == pcred->uid); | 4906 | match = (cred->euid == pcred->euid || |
4907 | cred->euid == pcred->uid); | ||
4908 | else | ||
4909 | match = false; | ||
4904 | rcu_read_unlock(); | 4910 | rcu_read_unlock(); |
4905 | return match; | 4911 | return match; |
4906 | } | 4912 | } |
@@ -5228,7 +5234,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
5228 | goto out_free_cpus_allowed; | 5234 | goto out_free_cpus_allowed; |
5229 | } | 5235 | } |
5230 | retval = -EPERM; | 5236 | retval = -EPERM; |
5231 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) | 5237 | if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE)) |
5232 | goto out_unlock; | 5238 | goto out_unlock; |
5233 | 5239 | ||
5234 | retval = security_task_setscheduler(p); | 5240 | retval = security_task_setscheduler(p); |
@@ -5534,6 +5540,7 @@ void __sched io_schedule(void) | |||
5534 | 5540 | ||
5535 | delayacct_blkio_start(); | 5541 | delayacct_blkio_start(); |
5536 | atomic_inc(&rq->nr_iowait); | 5542 | atomic_inc(&rq->nr_iowait); |
5543 | blk_flush_plug(current); | ||
5537 | current->in_iowait = 1; | 5544 | current->in_iowait = 1; |
5538 | schedule(); | 5545 | schedule(); |
5539 | current->in_iowait = 0; | 5546 | current->in_iowait = 0; |
@@ -5549,6 +5556,7 @@ long __sched io_schedule_timeout(long timeout) | |||
5549 | 5556 | ||
5550 | delayacct_blkio_start(); | 5557 | delayacct_blkio_start(); |
5551 | atomic_inc(&rq->nr_iowait); | 5558 | atomic_inc(&rq->nr_iowait); |
5559 | blk_flush_plug(current); | ||
5552 | current->in_iowait = 1; | 5560 | current->in_iowait = 1; |
5553 | ret = schedule_timeout(timeout); | 5561 | ret = schedule_timeout(timeout); |
5554 | current->in_iowait = 0; | 5562 | current->in_iowait = 0; |
@@ -8279,7 +8287,7 @@ static inline int preempt_count_equals(int preempt_offset) | |||
8279 | { | 8287 | { |
8280 | int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); | 8288 | int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); |
8281 | 8289 | ||
8282 | return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); | 8290 | return (nested == preempt_offset); |
8283 | } | 8291 | } |
8284 | 8292 | ||
8285 | void __might_sleep(const char *file, int line, int preempt_offset) | 8293 | void __might_sleep(const char *file, int line, int preempt_offset) |
diff --git a/kernel/signal.c b/kernel/signal.c index 4e3cff10fdce..324eff5468ad 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -636,13 +636,33 @@ static inline bool si_fromuser(const struct siginfo *info) | |||
636 | } | 636 | } |
637 | 637 | ||
638 | /* | 638 | /* |
639 | * called with RCU read lock from check_kill_permission() | ||
640 | */ | ||
641 | static int kill_ok_by_cred(struct task_struct *t) | ||
642 | { | ||
643 | const struct cred *cred = current_cred(); | ||
644 | const struct cred *tcred = __task_cred(t); | ||
645 | |||
646 | if (cred->user->user_ns == tcred->user->user_ns && | ||
647 | (cred->euid == tcred->suid || | ||
648 | cred->euid == tcred->uid || | ||
649 | cred->uid == tcred->suid || | ||
650 | cred->uid == tcred->uid)) | ||
651 | return 1; | ||
652 | |||
653 | if (ns_capable(tcred->user->user_ns, CAP_KILL)) | ||
654 | return 1; | ||
655 | |||
656 | return 0; | ||
657 | } | ||
658 | |||
659 | /* | ||
639 | * Bad permissions for sending the signal | 660 | * Bad permissions for sending the signal |
640 | * - the caller must hold the RCU read lock | 661 | * - the caller must hold the RCU read lock |
641 | */ | 662 | */ |
642 | static int check_kill_permission(int sig, struct siginfo *info, | 663 | static int check_kill_permission(int sig, struct siginfo *info, |
643 | struct task_struct *t) | 664 | struct task_struct *t) |
644 | { | 665 | { |
645 | const struct cred *cred, *tcred; | ||
646 | struct pid *sid; | 666 | struct pid *sid; |
647 | int error; | 667 | int error; |
648 | 668 | ||
@@ -656,14 +676,8 @@ static int check_kill_permission(int sig, struct siginfo *info, | |||
656 | if (error) | 676 | if (error) |
657 | return error; | 677 | return error; |
658 | 678 | ||
659 | cred = current_cred(); | ||
660 | tcred = __task_cred(t); | ||
661 | if (!same_thread_group(current, t) && | 679 | if (!same_thread_group(current, t) && |
662 | (cred->euid ^ tcred->suid) && | 680 | !kill_ok_by_cred(t)) { |
663 | (cred->euid ^ tcred->uid) && | ||
664 | (cred->uid ^ tcred->suid) && | ||
665 | (cred->uid ^ tcred->uid) && | ||
666 | !capable(CAP_KILL)) { | ||
667 | switch (sig) { | 681 | switch (sig) { |
668 | case SIGCONT: | 682 | case SIGCONT: |
669 | sid = task_session(t); | 683 | sid = task_session(t); |
@@ -2421,9 +2435,13 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, | |||
2421 | return -EFAULT; | 2435 | return -EFAULT; |
2422 | 2436 | ||
2423 | /* Not even root can pretend to send signals from the kernel. | 2437 | /* Not even root can pretend to send signals from the kernel. |
2424 | Nor can they impersonate a kill(), which adds source info. */ | 2438 | * Nor can they impersonate a kill()/tgkill(), which adds source info. |
2425 | if (info.si_code >= 0) | 2439 | */ |
2440 | if (info.si_code != SI_QUEUE) { | ||
2441 | /* We used to allow any < 0 si_code */ | ||
2442 | WARN_ON_ONCE(info.si_code < 0); | ||
2426 | return -EPERM; | 2443 | return -EPERM; |
2444 | } | ||
2427 | info.si_signo = sig; | 2445 | info.si_signo = sig; |
2428 | 2446 | ||
2429 | /* POSIX.1b doesn't mention process groups. */ | 2447 | /* POSIX.1b doesn't mention process groups. */ |
@@ -2437,9 +2455,13 @@ long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) | |||
2437 | return -EINVAL; | 2455 | return -EINVAL; |
2438 | 2456 | ||
2439 | /* Not even root can pretend to send signals from the kernel. | 2457 | /* Not even root can pretend to send signals from the kernel. |
2440 | Nor can they impersonate a kill(), which adds source info. */ | 2458 | * Nor can they impersonate a kill()/tgkill(), which adds source info. |
2441 | if (info->si_code >= 0) | 2459 | */ |
2460 | if (info->si_code != SI_QUEUE) { | ||
2461 | /* We used to allow any < 0 si_code */ | ||
2462 | WARN_ON_ONCE(info->si_code < 0); | ||
2442 | return -EPERM; | 2463 | return -EPERM; |
2464 | } | ||
2443 | info->si_signo = sig; | 2465 | info->si_signo = sig; |
2444 | 2466 | ||
2445 | return do_send_specific(tgid, pid, sig, info); | 2467 | return do_send_specific(tgid, pid, sig, info); |
diff --git a/kernel/smp.c b/kernel/smp.c index 9910744f0856..73a195193558 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -194,7 +194,7 @@ void generic_smp_call_function_interrupt(void) | |||
194 | */ | 194 | */ |
195 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { | 195 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { |
196 | int refs; | 196 | int refs; |
197 | void (*func) (void *info); | 197 | smp_call_func_t func; |
198 | 198 | ||
199 | /* | 199 | /* |
200 | * Since we walk the list without any locks, we might | 200 | * Since we walk the list without any locks, we might |
@@ -214,17 +214,17 @@ void generic_smp_call_function_interrupt(void) | |||
214 | if (atomic_read(&data->refs) == 0) | 214 | if (atomic_read(&data->refs) == 0) |
215 | continue; | 215 | continue; |
216 | 216 | ||
217 | func = data->csd.func; /* for later warn */ | 217 | func = data->csd.func; /* save for later warn */ |
218 | data->csd.func(data->csd.info); | 218 | func(data->csd.info); |
219 | 219 | ||
220 | /* | 220 | /* |
221 | * If the cpu mask is not still set then it enabled interrupts, | 221 | * If the cpu mask is not still set then func enabled |
222 | * we took another smp interrupt, and executed the function | 222 | * interrupts (BUG), and this cpu took another smp call |
223 | * twice on this cpu. In theory that copy decremented refs. | 223 | * function interrupt and executed func(info) twice |
224 | * on this cpu. That nested execution decremented refs. | ||
224 | */ | 225 | */ |
225 | if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) { | 226 | if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) { |
226 | WARN(1, "%pS enabled interrupts and double executed\n", | 227 | WARN(1, "%pf enabled interrupts and double executed\n", func); |
227 | func); | ||
228 | continue; | 228 | continue; |
229 | } | 229 | } |
230 | 230 | ||
@@ -450,7 +450,7 @@ void smp_call_function_many(const struct cpumask *mask, | |||
450 | { | 450 | { |
451 | struct call_function_data *data; | 451 | struct call_function_data *data; |
452 | unsigned long flags; | 452 | unsigned long flags; |
453 | int cpu, next_cpu, this_cpu = smp_processor_id(); | 453 | int refs, cpu, next_cpu, this_cpu = smp_processor_id(); |
454 | 454 | ||
455 | /* | 455 | /* |
456 | * Can deadlock when called with interrupts disabled. | 456 | * Can deadlock when called with interrupts disabled. |
@@ -461,7 +461,7 @@ void smp_call_function_many(const struct cpumask *mask, | |||
461 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() | 461 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() |
462 | && !oops_in_progress && !early_boot_irqs_disabled); | 462 | && !oops_in_progress && !early_boot_irqs_disabled); |
463 | 463 | ||
464 | /* So, what's a CPU they want? Ignoring this one. */ | 464 | /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */ |
465 | cpu = cpumask_first_and(mask, cpu_online_mask); | 465 | cpu = cpumask_first_and(mask, cpu_online_mask); |
466 | if (cpu == this_cpu) | 466 | if (cpu == this_cpu) |
467 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 467 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
@@ -483,22 +483,49 @@ void smp_call_function_many(const struct cpumask *mask, | |||
483 | 483 | ||
484 | data = &__get_cpu_var(cfd_data); | 484 | data = &__get_cpu_var(cfd_data); |
485 | csd_lock(&data->csd); | 485 | csd_lock(&data->csd); |
486 | |||
487 | /* This BUG_ON verifies our reuse assertions and can be removed */ | ||
486 | BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask)); | 488 | BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask)); |
487 | 489 | ||
490 | /* | ||
491 | * The global call function queue list add and delete are protected | ||
492 | * by a lock, but the list is traversed without any lock, relying | ||
493 | * on the rcu list add and delete to allow safe concurrent traversal. | ||
494 | * We reuse the call function data without waiting for any grace | ||
495 | * period after some other cpu removes it from the global queue. | ||
496 | * This means a cpu might find our data block as it is being | ||
497 | * filled out. | ||
498 | * | ||
499 | * We hold off the interrupt handler on the other cpu by | ||
500 | * ordering our writes to the cpu mask vs our setting of the | ||
501 | * refs counter. We assert only the cpu owning the data block | ||
502 | * will set a bit in cpumask, and each bit will only be cleared | ||
503 | * by the subject cpu. Each cpu must first find its bit is | ||
504 | * set and then check that refs is set indicating the element is | ||
505 | * ready to be processed, otherwise it must skip the entry. | ||
506 | * | ||
507 | * On the previous iteration refs was set to 0 by another cpu. | ||
508 | * To avoid the use of transitivity, set the counter to 0 here | ||
509 | * so the wmb will pair with the rmb in the interrupt handler. | ||
510 | */ | ||
511 | atomic_set(&data->refs, 0); /* convert 3rd to 1st party write */ | ||
512 | |||
488 | data->csd.func = func; | 513 | data->csd.func = func; |
489 | data->csd.info = info; | 514 | data->csd.info = info; |
490 | cpumask_and(data->cpumask, mask, cpu_online_mask); | ||
491 | cpumask_clear_cpu(this_cpu, data->cpumask); | ||
492 | 515 | ||
493 | /* | 516 | /* Ensure 0 refs is visible before mask. Also orders func and info */ |
494 | * To ensure the interrupt handler gets an complete view | ||
495 | * we order the cpumask and refs writes and order the read | ||
496 | * of them in the interrupt handler. In addition we may | ||
497 | * only clear our own cpu bit from the mask. | ||
498 | */ | ||
499 | smp_wmb(); | 517 | smp_wmb(); |
500 | 518 | ||
501 | atomic_set(&data->refs, cpumask_weight(data->cpumask)); | 519 | /* We rely on the "and" being processed before the store */ |
520 | cpumask_and(data->cpumask, mask, cpu_online_mask); | ||
521 | cpumask_clear_cpu(this_cpu, data->cpumask); | ||
522 | refs = cpumask_weight(data->cpumask); | ||
523 | |||
524 | /* Some callers race with other cpus changing the passed mask */ | ||
525 | if (unlikely(!refs)) { | ||
526 | csd_unlock(&data->csd); | ||
527 | return; | ||
528 | } | ||
502 | 529 | ||
503 | raw_spin_lock_irqsave(&call_function.lock, flags); | 530 | raw_spin_lock_irqsave(&call_function.lock, flags); |
504 | /* | 531 | /* |
@@ -507,6 +534,12 @@ void smp_call_function_many(const struct cpumask *mask, | |||
507 | * will not miss any other list entries: | 534 | * will not miss any other list entries: |
508 | */ | 535 | */ |
509 | list_add_rcu(&data->csd.list, &call_function.queue); | 536 | list_add_rcu(&data->csd.list, &call_function.queue); |
537 | /* | ||
538 | * We rely on the wmb() in list_add_rcu to complete our writes | ||
539 | * to the cpumask before this write to refs, which indicates | ||
540 | * data is on the list and is ready to be processed. | ||
541 | */ | ||
542 | atomic_set(&data->refs, refs); | ||
510 | raw_spin_unlock_irqrestore(&call_function.lock, flags); | 543 | raw_spin_unlock_irqrestore(&call_function.lock, flags); |
511 | 544 | ||
512 | /* | 545 | /* |
@@ -571,6 +604,87 @@ void ipi_call_unlock_irq(void) | |||
571 | } | 604 | } |
572 | #endif /* USE_GENERIC_SMP_HELPERS */ | 605 | #endif /* USE_GENERIC_SMP_HELPERS */ |
573 | 606 | ||
607 | /* Setup configured maximum number of CPUs to activate */ | ||
608 | unsigned int setup_max_cpus = NR_CPUS; | ||
609 | EXPORT_SYMBOL(setup_max_cpus); | ||
610 | |||
611 | |||
612 | /* | ||
613 | * Setup routine for controlling SMP activation | ||
614 | * | ||
615 | * Command-line option of "nosmp" or "maxcpus=0" will disable SMP | ||
616 | * activation entirely (the MPS table probe still happens, though). | ||
617 | * | ||
618 | * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer | ||
619 | * greater than 0, limits the maximum number of CPUs activated in | ||
620 | * SMP mode to <NUM>. | ||
621 | */ | ||
622 | |||
623 | void __weak arch_disable_smp_support(void) { } | ||
624 | |||
625 | static int __init nosmp(char *str) | ||
626 | { | ||
627 | setup_max_cpus = 0; | ||
628 | arch_disable_smp_support(); | ||
629 | |||
630 | return 0; | ||
631 | } | ||
632 | |||
633 | early_param("nosmp", nosmp); | ||
634 | |||
635 | /* this is hard limit */ | ||
636 | static int __init nrcpus(char *str) | ||
637 | { | ||
638 | int nr_cpus; | ||
639 | |||
640 | get_option(&str, &nr_cpus); | ||
641 | if (nr_cpus > 0 && nr_cpus < nr_cpu_ids) | ||
642 | nr_cpu_ids = nr_cpus; | ||
643 | |||
644 | return 0; | ||
645 | } | ||
646 | |||
647 | early_param("nr_cpus", nrcpus); | ||
648 | |||
649 | static int __init maxcpus(char *str) | ||
650 | { | ||
651 | get_option(&str, &setup_max_cpus); | ||
652 | if (setup_max_cpus == 0) | ||
653 | arch_disable_smp_support(); | ||
654 | |||
655 | return 0; | ||
656 | } | ||
657 | |||
658 | early_param("maxcpus", maxcpus); | ||
659 | |||
660 | /* Setup number of possible processor ids */ | ||
661 | int nr_cpu_ids __read_mostly = NR_CPUS; | ||
662 | EXPORT_SYMBOL(nr_cpu_ids); | ||
663 | |||
664 | /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */ | ||
665 | void __init setup_nr_cpu_ids(void) | ||
666 | { | ||
667 | nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; | ||
668 | } | ||
669 | |||
670 | /* Called by boot processor to activate the rest. */ | ||
671 | void __init smp_init(void) | ||
672 | { | ||
673 | unsigned int cpu; | ||
674 | |||
675 | /* FIXME: This should be done in userspace --RR */ | ||
676 | for_each_present_cpu(cpu) { | ||
677 | if (num_online_cpus() >= setup_max_cpus) | ||
678 | break; | ||
679 | if (!cpu_online(cpu)) | ||
680 | cpu_up(cpu); | ||
681 | } | ||
682 | |||
683 | /* Any cleanup work */ | ||
684 | printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus()); | ||
685 | smp_cpus_done(setup_max_cpus); | ||
686 | } | ||
687 | |||
574 | /* | 688 | /* |
575 | * Call a function on all processors. May be used during early boot while | 689 | * Call a function on all processors. May be used during early boot while |
576 | * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead | 690 | * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 56e5dec837f0..735d87095172 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -845,7 +845,10 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, | |||
845 | switch (action) { | 845 | switch (action) { |
846 | case CPU_UP_PREPARE: | 846 | case CPU_UP_PREPARE: |
847 | case CPU_UP_PREPARE_FROZEN: | 847 | case CPU_UP_PREPARE_FROZEN: |
848 | p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); | 848 | p = kthread_create_on_node(run_ksoftirqd, |
849 | hcpu, | ||
850 | cpu_to_node(hotcpu), | ||
851 | "ksoftirqd/%d", hotcpu); | ||
849 | if (IS_ERR(p)) { | 852 | if (IS_ERR(p)) { |
850 | printk("ksoftirqd for %i failed\n", hotcpu); | 853 | printk("ksoftirqd for %i failed\n", hotcpu); |
851 | return notifier_from_errno(PTR_ERR(p)); | 854 | return notifier_from_errno(PTR_ERR(p)); |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 2df820b03beb..e3516b29076c 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -301,8 +301,10 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | |||
301 | case CPU_UP_PREPARE: | 301 | case CPU_UP_PREPARE: |
302 | BUG_ON(stopper->thread || stopper->enabled || | 302 | BUG_ON(stopper->thread || stopper->enabled || |
303 | !list_empty(&stopper->works)); | 303 | !list_empty(&stopper->works)); |
304 | p = kthread_create(cpu_stopper_thread, stopper, "migration/%d", | 304 | p = kthread_create_on_node(cpu_stopper_thread, |
305 | cpu); | 305 | stopper, |
306 | cpu_to_node(cpu), | ||
307 | "migration/%d", cpu); | ||
306 | if (IS_ERR(p)) | 308 | if (IS_ERR(p)) |
307 | return notifier_from_errno(PTR_ERR(p)); | 309 | return notifier_from_errno(PTR_ERR(p)); |
308 | get_task_struct(p); | 310 | get_task_struct(p); |
diff --git a/kernel/sys.c b/kernel/sys.c index 18da702ec813..af468edf096a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/ptrace.h> | 37 | #include <linux/ptrace.h> |
38 | #include <linux/fs_struct.h> | 38 | #include <linux/fs_struct.h> |
39 | #include <linux/gfp.h> | 39 | #include <linux/gfp.h> |
40 | #include <linux/syscore_ops.h> | ||
40 | 41 | ||
41 | #include <linux/compat.h> | 42 | #include <linux/compat.h> |
42 | #include <linux/syscalls.h> | 43 | #include <linux/syscalls.h> |
@@ -119,16 +120,33 @@ EXPORT_SYMBOL(cad_pid); | |||
119 | void (*pm_power_off_prepare)(void); | 120 | void (*pm_power_off_prepare)(void); |
120 | 121 | ||
121 | /* | 122 | /* |
123 | * Returns true if current's euid is same as p's uid or euid, | ||
124 | * or has CAP_SYS_NICE to p's user_ns. | ||
125 | * | ||
126 | * Called with rcu_read_lock, creds are safe | ||
127 | */ | ||
128 | static bool set_one_prio_perm(struct task_struct *p) | ||
129 | { | ||
130 | const struct cred *cred = current_cred(), *pcred = __task_cred(p); | ||
131 | |||
132 | if (pcred->user->user_ns == cred->user->user_ns && | ||
133 | (pcred->uid == cred->euid || | ||
134 | pcred->euid == cred->euid)) | ||
135 | return true; | ||
136 | if (ns_capable(pcred->user->user_ns, CAP_SYS_NICE)) | ||
137 | return true; | ||
138 | return false; | ||
139 | } | ||
140 | |||
141 | /* | ||
122 | * set the priority of a task | 142 | * set the priority of a task |
123 | * - the caller must hold the RCU read lock | 143 | * - the caller must hold the RCU read lock |
124 | */ | 144 | */ |
125 | static int set_one_prio(struct task_struct *p, int niceval, int error) | 145 | static int set_one_prio(struct task_struct *p, int niceval, int error) |
126 | { | 146 | { |
127 | const struct cred *cred = current_cred(), *pcred = __task_cred(p); | ||
128 | int no_nice; | 147 | int no_nice; |
129 | 148 | ||
130 | if (pcred->uid != cred->euid && | 149 | if (!set_one_prio_perm(p)) { |
131 | pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) { | ||
132 | error = -EPERM; | 150 | error = -EPERM; |
133 | goto out; | 151 | goto out; |
134 | } | 152 | } |
@@ -298,6 +316,7 @@ void kernel_restart_prepare(char *cmd) | |||
298 | system_state = SYSTEM_RESTART; | 316 | system_state = SYSTEM_RESTART; |
299 | device_shutdown(); | 317 | device_shutdown(); |
300 | sysdev_shutdown(); | 318 | sysdev_shutdown(); |
319 | syscore_shutdown(); | ||
301 | } | 320 | } |
302 | 321 | ||
303 | /** | 322 | /** |
@@ -336,6 +355,7 @@ void kernel_halt(void) | |||
336 | { | 355 | { |
337 | kernel_shutdown_prepare(SYSTEM_HALT); | 356 | kernel_shutdown_prepare(SYSTEM_HALT); |
338 | sysdev_shutdown(); | 357 | sysdev_shutdown(); |
358 | syscore_shutdown(); | ||
339 | printk(KERN_EMERG "System halted.\n"); | 359 | printk(KERN_EMERG "System halted.\n"); |
340 | kmsg_dump(KMSG_DUMP_HALT); | 360 | kmsg_dump(KMSG_DUMP_HALT); |
341 | machine_halt(); | 361 | machine_halt(); |
@@ -355,6 +375,7 @@ void kernel_power_off(void) | |||
355 | pm_power_off_prepare(); | 375 | pm_power_off_prepare(); |
356 | disable_nonboot_cpus(); | 376 | disable_nonboot_cpus(); |
357 | sysdev_shutdown(); | 377 | sysdev_shutdown(); |
378 | syscore_shutdown(); | ||
358 | printk(KERN_EMERG "Power down.\n"); | 379 | printk(KERN_EMERG "Power down.\n"); |
359 | kmsg_dump(KMSG_DUMP_POWEROFF); | 380 | kmsg_dump(KMSG_DUMP_POWEROFF); |
360 | machine_power_off(); | 381 | machine_power_off(); |
@@ -502,7 +523,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) | |||
502 | if (rgid != (gid_t) -1) { | 523 | if (rgid != (gid_t) -1) { |
503 | if (old->gid == rgid || | 524 | if (old->gid == rgid || |
504 | old->egid == rgid || | 525 | old->egid == rgid || |
505 | capable(CAP_SETGID)) | 526 | nsown_capable(CAP_SETGID)) |
506 | new->gid = rgid; | 527 | new->gid = rgid; |
507 | else | 528 | else |
508 | goto error; | 529 | goto error; |
@@ -511,7 +532,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) | |||
511 | if (old->gid == egid || | 532 | if (old->gid == egid || |
512 | old->egid == egid || | 533 | old->egid == egid || |
513 | old->sgid == egid || | 534 | old->sgid == egid || |
514 | capable(CAP_SETGID)) | 535 | nsown_capable(CAP_SETGID)) |
515 | new->egid = egid; | 536 | new->egid = egid; |
516 | else | 537 | else |
517 | goto error; | 538 | goto error; |
@@ -546,7 +567,7 @@ SYSCALL_DEFINE1(setgid, gid_t, gid) | |||
546 | old = current_cred(); | 567 | old = current_cred(); |
547 | 568 | ||
548 | retval = -EPERM; | 569 | retval = -EPERM; |
549 | if (capable(CAP_SETGID)) | 570 | if (nsown_capable(CAP_SETGID)) |
550 | new->gid = new->egid = new->sgid = new->fsgid = gid; | 571 | new->gid = new->egid = new->sgid = new->fsgid = gid; |
551 | else if (gid == old->gid || gid == old->sgid) | 572 | else if (gid == old->gid || gid == old->sgid) |
552 | new->egid = new->fsgid = gid; | 573 | new->egid = new->fsgid = gid; |
@@ -613,7 +634,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) | |||
613 | new->uid = ruid; | 634 | new->uid = ruid; |
614 | if (old->uid != ruid && | 635 | if (old->uid != ruid && |
615 | old->euid != ruid && | 636 | old->euid != ruid && |
616 | !capable(CAP_SETUID)) | 637 | !nsown_capable(CAP_SETUID)) |
617 | goto error; | 638 | goto error; |
618 | } | 639 | } |
619 | 640 | ||
@@ -622,7 +643,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) | |||
622 | if (old->uid != euid && | 643 | if (old->uid != euid && |
623 | old->euid != euid && | 644 | old->euid != euid && |
624 | old->suid != euid && | 645 | old->suid != euid && |
625 | !capable(CAP_SETUID)) | 646 | !nsown_capable(CAP_SETUID)) |
626 | goto error; | 647 | goto error; |
627 | } | 648 | } |
628 | 649 | ||
@@ -670,7 +691,7 @@ SYSCALL_DEFINE1(setuid, uid_t, uid) | |||
670 | old = current_cred(); | 691 | old = current_cred(); |
671 | 692 | ||
672 | retval = -EPERM; | 693 | retval = -EPERM; |
673 | if (capable(CAP_SETUID)) { | 694 | if (nsown_capable(CAP_SETUID)) { |
674 | new->suid = new->uid = uid; | 695 | new->suid = new->uid = uid; |
675 | if (uid != old->uid) { | 696 | if (uid != old->uid) { |
676 | retval = set_user(new); | 697 | retval = set_user(new); |
@@ -712,7 +733,7 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) | |||
712 | old = current_cred(); | 733 | old = current_cred(); |
713 | 734 | ||
714 | retval = -EPERM; | 735 | retval = -EPERM; |
715 | if (!capable(CAP_SETUID)) { | 736 | if (!nsown_capable(CAP_SETUID)) { |
716 | if (ruid != (uid_t) -1 && ruid != old->uid && | 737 | if (ruid != (uid_t) -1 && ruid != old->uid && |
717 | ruid != old->euid && ruid != old->suid) | 738 | ruid != old->euid && ruid != old->suid) |
718 | goto error; | 739 | goto error; |
@@ -776,7 +797,7 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) | |||
776 | old = current_cred(); | 797 | old = current_cred(); |
777 | 798 | ||
778 | retval = -EPERM; | 799 | retval = -EPERM; |
779 | if (!capable(CAP_SETGID)) { | 800 | if (!nsown_capable(CAP_SETGID)) { |
780 | if (rgid != (gid_t) -1 && rgid != old->gid && | 801 | if (rgid != (gid_t) -1 && rgid != old->gid && |
781 | rgid != old->egid && rgid != old->sgid) | 802 | rgid != old->egid && rgid != old->sgid) |
782 | goto error; | 803 | goto error; |
@@ -836,7 +857,7 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid) | |||
836 | 857 | ||
837 | if (uid == old->uid || uid == old->euid || | 858 | if (uid == old->uid || uid == old->euid || |
838 | uid == old->suid || uid == old->fsuid || | 859 | uid == old->suid || uid == old->fsuid || |
839 | capable(CAP_SETUID)) { | 860 | nsown_capable(CAP_SETUID)) { |
840 | if (uid != old_fsuid) { | 861 | if (uid != old_fsuid) { |
841 | new->fsuid = uid; | 862 | new->fsuid = uid; |
842 | if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) | 863 | if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) |
@@ -869,7 +890,7 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid) | |||
869 | 890 | ||
870 | if (gid == old->gid || gid == old->egid || | 891 | if (gid == old->gid || gid == old->egid || |
871 | gid == old->sgid || gid == old->fsgid || | 892 | gid == old->sgid || gid == old->fsgid || |
872 | capable(CAP_SETGID)) { | 893 | nsown_capable(CAP_SETGID)) { |
873 | if (gid != old_fsgid) { | 894 | if (gid != old_fsgid) { |
874 | new->fsgid = gid; | 895 | new->fsgid = gid; |
875 | goto change_okay; | 896 | goto change_okay; |
@@ -1177,8 +1198,9 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) | |||
1177 | int errno; | 1198 | int errno; |
1178 | char tmp[__NEW_UTS_LEN]; | 1199 | char tmp[__NEW_UTS_LEN]; |
1179 | 1200 | ||
1180 | if (!capable(CAP_SYS_ADMIN)) | 1201 | if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) |
1181 | return -EPERM; | 1202 | return -EPERM; |
1203 | |||
1182 | if (len < 0 || len > __NEW_UTS_LEN) | 1204 | if (len < 0 || len > __NEW_UTS_LEN) |
1183 | return -EINVAL; | 1205 | return -EINVAL; |
1184 | down_write(&uts_sem); | 1206 | down_write(&uts_sem); |
@@ -1226,7 +1248,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) | |||
1226 | int errno; | 1248 | int errno; |
1227 | char tmp[__NEW_UTS_LEN]; | 1249 | char tmp[__NEW_UTS_LEN]; |
1228 | 1250 | ||
1229 | if (!capable(CAP_SYS_ADMIN)) | 1251 | if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) |
1230 | return -EPERM; | 1252 | return -EPERM; |
1231 | if (len < 0 || len > __NEW_UTS_LEN) | 1253 | if (len < 0 || len > __NEW_UTS_LEN) |
1232 | return -EINVAL; | 1254 | return -EINVAL; |
@@ -1341,6 +1363,8 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource, | |||
1341 | rlim = tsk->signal->rlim + resource; | 1363 | rlim = tsk->signal->rlim + resource; |
1342 | task_lock(tsk->group_leader); | 1364 | task_lock(tsk->group_leader); |
1343 | if (new_rlim) { | 1365 | if (new_rlim) { |
1366 | /* Keep the capable check against init_user_ns until | ||
1367 | cgroups can contain all limits */ | ||
1344 | if (new_rlim->rlim_max > rlim->rlim_max && | 1368 | if (new_rlim->rlim_max > rlim->rlim_max && |
1345 | !capable(CAP_SYS_RESOURCE)) | 1369 | !capable(CAP_SYS_RESOURCE)) |
1346 | retval = -EPERM; | 1370 | retval = -EPERM; |
@@ -1384,19 +1408,22 @@ static int check_prlimit_permission(struct task_struct *task) | |||
1384 | { | 1408 | { |
1385 | const struct cred *cred = current_cred(), *tcred; | 1409 | const struct cred *cred = current_cred(), *tcred; |
1386 | 1410 | ||
1387 | tcred = __task_cred(task); | 1411 | if (current == task) |
1388 | if (current != task && | 1412 | return 0; |
1389 | (cred->uid != tcred->euid || | ||
1390 | cred->uid != tcred->suid || | ||
1391 | cred->uid != tcred->uid || | ||
1392 | cred->gid != tcred->egid || | ||
1393 | cred->gid != tcred->sgid || | ||
1394 | cred->gid != tcred->gid) && | ||
1395 | !capable(CAP_SYS_RESOURCE)) { | ||
1396 | return -EPERM; | ||
1397 | } | ||
1398 | 1413 | ||
1399 | return 0; | 1414 | tcred = __task_cred(task); |
1415 | if (cred->user->user_ns == tcred->user->user_ns && | ||
1416 | (cred->uid == tcred->euid && | ||
1417 | cred->uid == tcred->suid && | ||
1418 | cred->uid == tcred->uid && | ||
1419 | cred->gid == tcred->egid && | ||
1420 | cred->gid == tcred->sgid && | ||
1421 | cred->gid == tcred->gid)) | ||
1422 | return 0; | ||
1423 | if (ns_capable(tcred->user->user_ns, CAP_SYS_RESOURCE)) | ||
1424 | return 0; | ||
1425 | |||
1426 | return -EPERM; | ||
1400 | } | 1427 | } |
1401 | 1428 | ||
1402 | SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, | 1429 | SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 51054fea5d99..c0bb32414b17 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -117,6 +117,7 @@ static int neg_one = -1; | |||
117 | static int zero; | 117 | static int zero; |
118 | static int __maybe_unused one = 1; | 118 | static int __maybe_unused one = 1; |
119 | static int __maybe_unused two = 2; | 119 | static int __maybe_unused two = 2; |
120 | static int __maybe_unused three = 3; | ||
120 | static unsigned long one_ul = 1; | 121 | static unsigned long one_ul = 1; |
121 | static int one_hundred = 100; | 122 | static int one_hundred = 100; |
122 | #ifdef CONFIG_PRINTK | 123 | #ifdef CONFIG_PRINTK |
@@ -169,6 +170,11 @@ static int proc_taint(struct ctl_table *table, int write, | |||
169 | void __user *buffer, size_t *lenp, loff_t *ppos); | 170 | void __user *buffer, size_t *lenp, loff_t *ppos); |
170 | #endif | 171 | #endif |
171 | 172 | ||
173 | #ifdef CONFIG_PRINTK | ||
174 | static int proc_dmesg_restrict(struct ctl_table *table, int write, | ||
175 | void __user *buffer, size_t *lenp, loff_t *ppos); | ||
176 | #endif | ||
177 | |||
172 | #ifdef CONFIG_MAGIC_SYSRQ | 178 | #ifdef CONFIG_MAGIC_SYSRQ |
173 | /* Note: sysrq code uses it's own private copy */ | 179 | /* Note: sysrq code uses it's own private copy */ |
174 | static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; | 180 | static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; |
@@ -706,7 +712,7 @@ static struct ctl_table kern_table[] = { | |||
706 | .data = &kptr_restrict, | 712 | .data = &kptr_restrict, |
707 | .maxlen = sizeof(int), | 713 | .maxlen = sizeof(int), |
708 | .mode = 0644, | 714 | .mode = 0644, |
709 | .proc_handler = proc_dointvec_minmax, | 715 | .proc_handler = proc_dmesg_restrict, |
710 | .extra1 = &zero, | 716 | .extra1 = &zero, |
711 | .extra2 = &two, | 717 | .extra2 = &two, |
712 | }, | 718 | }, |
@@ -971,14 +977,18 @@ static struct ctl_table vm_table[] = { | |||
971 | .data = &sysctl_overcommit_memory, | 977 | .data = &sysctl_overcommit_memory, |
972 | .maxlen = sizeof(sysctl_overcommit_memory), | 978 | .maxlen = sizeof(sysctl_overcommit_memory), |
973 | .mode = 0644, | 979 | .mode = 0644, |
974 | .proc_handler = proc_dointvec, | 980 | .proc_handler = proc_dointvec_minmax, |
981 | .extra1 = &zero, | ||
982 | .extra2 = &two, | ||
975 | }, | 983 | }, |
976 | { | 984 | { |
977 | .procname = "panic_on_oom", | 985 | .procname = "panic_on_oom", |
978 | .data = &sysctl_panic_on_oom, | 986 | .data = &sysctl_panic_on_oom, |
979 | .maxlen = sizeof(sysctl_panic_on_oom), | 987 | .maxlen = sizeof(sysctl_panic_on_oom), |
980 | .mode = 0644, | 988 | .mode = 0644, |
981 | .proc_handler = proc_dointvec, | 989 | .proc_handler = proc_dointvec_minmax, |
990 | .extra1 = &zero, | ||
991 | .extra2 = &two, | ||
982 | }, | 992 | }, |
983 | { | 993 | { |
984 | .procname = "oom_kill_allocating_task", | 994 | .procname = "oom_kill_allocating_task", |
@@ -1006,7 +1016,8 @@ static struct ctl_table vm_table[] = { | |||
1006 | .data = &page_cluster, | 1016 | .data = &page_cluster, |
1007 | .maxlen = sizeof(int), | 1017 | .maxlen = sizeof(int), |
1008 | .mode = 0644, | 1018 | .mode = 0644, |
1009 | .proc_handler = proc_dointvec, | 1019 | .proc_handler = proc_dointvec_minmax, |
1020 | .extra1 = &zero, | ||
1010 | }, | 1021 | }, |
1011 | { | 1022 | { |
1012 | .procname = "dirty_background_ratio", | 1023 | .procname = "dirty_background_ratio", |
@@ -1054,7 +1065,8 @@ static struct ctl_table vm_table[] = { | |||
1054 | .data = &dirty_expire_interval, | 1065 | .data = &dirty_expire_interval, |
1055 | .maxlen = sizeof(dirty_expire_interval), | 1066 | .maxlen = sizeof(dirty_expire_interval), |
1056 | .mode = 0644, | 1067 | .mode = 0644, |
1057 | .proc_handler = proc_dointvec, | 1068 | .proc_handler = proc_dointvec_minmax, |
1069 | .extra1 = &zero, | ||
1058 | }, | 1070 | }, |
1059 | { | 1071 | { |
1060 | .procname = "nr_pdflush_threads", | 1072 | .procname = "nr_pdflush_threads", |
@@ -1130,6 +1142,8 @@ static struct ctl_table vm_table[] = { | |||
1130 | .maxlen = sizeof(int), | 1142 | .maxlen = sizeof(int), |
1131 | .mode = 0644, | 1143 | .mode = 0644, |
1132 | .proc_handler = drop_caches_sysctl_handler, | 1144 | .proc_handler = drop_caches_sysctl_handler, |
1145 | .extra1 = &one, | ||
1146 | .extra2 = &three, | ||
1133 | }, | 1147 | }, |
1134 | #ifdef CONFIG_COMPACTION | 1148 | #ifdef CONFIG_COMPACTION |
1135 | { | 1149 | { |
@@ -1683,13 +1697,8 @@ static int test_perm(int mode, int op) | |||
1683 | 1697 | ||
1684 | int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) | 1698 | int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) |
1685 | { | 1699 | { |
1686 | int error; | ||
1687 | int mode; | 1700 | int mode; |
1688 | 1701 | ||
1689 | error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC)); | ||
1690 | if (error) | ||
1691 | return error; | ||
1692 | |||
1693 | if (root->permissions) | 1702 | if (root->permissions) |
1694 | mode = root->permissions(root, current->nsproxy, table); | 1703 | mode = root->permissions(root, current->nsproxy, table); |
1695 | else | 1704 | else |
@@ -2390,6 +2399,17 @@ static int proc_taint(struct ctl_table *table, int write, | |||
2390 | return err; | 2399 | return err; |
2391 | } | 2400 | } |
2392 | 2401 | ||
2402 | #ifdef CONFIG_PRINTK | ||
2403 | static int proc_dmesg_restrict(struct ctl_table *table, int write, | ||
2404 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
2405 | { | ||
2406 | if (write && !capable(CAP_SYS_ADMIN)) | ||
2407 | return -EPERM; | ||
2408 | |||
2409 | return proc_dointvec_minmax(table, write, buffer, lenp, ppos); | ||
2410 | } | ||
2411 | #endif | ||
2412 | |||
2393 | struct do_proc_dointvec_minmax_conv_param { | 2413 | struct do_proc_dointvec_minmax_conv_param { |
2394 | int *min; | 2414 | int *min; |
2395 | int *max; | 2415 | int *max; |
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index 10b90d8a03c4..4e4932a7b360 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c | |||
@@ -111,11 +111,9 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) | |||
111 | const char *fail = NULL; | 111 | const char *fail = NULL; |
112 | 112 | ||
113 | if (table->parent) { | 113 | if (table->parent) { |
114 | if (table->procname && !table->parent->procname) | 114 | if (!table->parent->procname) |
115 | set_fail(&fail, table, "Parent without procname"); | 115 | set_fail(&fail, table, "Parent without procname"); |
116 | } | 116 | } |
117 | if (!table->procname) | ||
118 | set_fail(&fail, table, "No procname"); | ||
119 | if (table->child) { | 117 | if (table->child) { |
120 | if (table->data) | 118 | if (table->data) |
121 | set_fail(&fail, table, "Directory with data?"); | 119 | set_fail(&fail, table, "Directory with data?"); |
@@ -144,13 +142,9 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) | |||
144 | set_fail(&fail, table, "No maxlen"); | 142 | set_fail(&fail, table, "No maxlen"); |
145 | } | 143 | } |
146 | #ifdef CONFIG_PROC_SYSCTL | 144 | #ifdef CONFIG_PROC_SYSCTL |
147 | if (table->procname && !table->proc_handler) | 145 | if (!table->proc_handler) |
148 | set_fail(&fail, table, "No proc_handler"); | 146 | set_fail(&fail, table, "No proc_handler"); |
149 | #endif | 147 | #endif |
150 | #if 0 | ||
151 | if (!table->procname && table->proc_handler) | ||
152 | set_fail(&fail, table, "proc_handler without procname"); | ||
153 | #endif | ||
154 | sysctl_check_leaf(namespaces, table, &fail); | 148 | sysctl_check_leaf(namespaces, table, &fail); |
155 | } | 149 | } |
156 | if (table->mode > 0777) | 150 | if (table->mode > 0777) |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 3971c6b9d58d..9ffea360a778 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -685,7 +685,7 @@ static int __init taskstats_init(void) | |||
685 | goto err_cgroup_ops; | 685 | goto err_cgroup_ops; |
686 | 686 | ||
687 | family_registered = 1; | 687 | family_registered = 1; |
688 | printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); | 688 | pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); |
689 | return 0; | 689 | return 0; |
690 | err_cgroup_ops: | 690 | err_cgroup_ops: |
691 | genl_unregister_ops(&family, &taskstats_ops); | 691 | genl_unregister_ops(&family, &taskstats_ops); |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 14674dce77a6..61d7d59f4a1a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -275,7 +275,7 @@ config PROFILE_ANNOTATED_BRANCHES | |||
275 | This tracer profiles all the the likely and unlikely macros | 275 | This tracer profiles all the the likely and unlikely macros |
276 | in the kernel. It will display the results in: | 276 | in the kernel. It will display the results in: |
277 | 277 | ||
278 | /sys/kernel/debug/tracing/profile_annotated_branch | 278 | /sys/kernel/debug/tracing/trace_stat/branch_annotated |
279 | 279 | ||
280 | Note: this will add a significant overhead; only turn this | 280 | Note: this will add a significant overhead; only turn this |
281 | on if you need to profile the system's use of these macros. | 281 | on if you need to profile the system's use of these macros. |
@@ -288,7 +288,7 @@ config PROFILE_ALL_BRANCHES | |||
288 | taken in the kernel is recorded whether it hit or miss. | 288 | taken in the kernel is recorded whether it hit or miss. |
289 | The results will be displayed in: | 289 | The results will be displayed in: |
290 | 290 | ||
291 | /sys/kernel/debug/tracing/profile_branch | 291 | /sys/kernel/debug/tracing/trace_stat/branch_all |
292 | 292 | ||
293 | This option also enables the likely/unlikely profiler. | 293 | This option also enables the likely/unlikely profiler. |
294 | 294 | ||
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index cbafed7d4f38..7aa40f8e182d 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -703,28 +703,21 @@ void blk_trace_shutdown(struct request_queue *q) | |||
703 | * | 703 | * |
704 | **/ | 704 | **/ |
705 | static void blk_add_trace_rq(struct request_queue *q, struct request *rq, | 705 | static void blk_add_trace_rq(struct request_queue *q, struct request *rq, |
706 | u32 what) | 706 | u32 what) |
707 | { | 707 | { |
708 | struct blk_trace *bt = q->blk_trace; | 708 | struct blk_trace *bt = q->blk_trace; |
709 | int rw = rq->cmd_flags & 0x03; | ||
710 | 709 | ||
711 | if (likely(!bt)) | 710 | if (likely(!bt)) |
712 | return; | 711 | return; |
713 | 712 | ||
714 | if (rq->cmd_flags & REQ_DISCARD) | ||
715 | rw |= REQ_DISCARD; | ||
716 | |||
717 | if (rq->cmd_flags & REQ_SECURE) | ||
718 | rw |= REQ_SECURE; | ||
719 | |||
720 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { | 713 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
721 | what |= BLK_TC_ACT(BLK_TC_PC); | 714 | what |= BLK_TC_ACT(BLK_TC_PC); |
722 | __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, | 715 | __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags, |
723 | what, rq->errors, rq->cmd_len, rq->cmd); | 716 | what, rq->errors, rq->cmd_len, rq->cmd); |
724 | } else { | 717 | } else { |
725 | what |= BLK_TC_ACT(BLK_TC_FS); | 718 | what |= BLK_TC_ACT(BLK_TC_FS); |
726 | __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw, | 719 | __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), |
727 | what, rq->errors, 0, NULL); | 720 | rq->cmd_flags, what, rq->errors, 0, NULL); |
728 | } | 721 | } |
729 | } | 722 | } |
730 | 723 | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 888b611897d3..c075f4ea6b94 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -1467,7 +1467,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
1467 | return t_hash_next(m, pos); | 1467 | return t_hash_next(m, pos); |
1468 | 1468 | ||
1469 | (*pos)++; | 1469 | (*pos)++; |
1470 | iter->pos = *pos; | 1470 | iter->pos = iter->func_pos = *pos; |
1471 | 1471 | ||
1472 | if (iter->flags & FTRACE_ITER_PRINTALL) | 1472 | if (iter->flags & FTRACE_ITER_PRINTALL) |
1473 | return t_hash_start(m, pos); | 1473 | return t_hash_start(m, pos); |
@@ -1502,7 +1502,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
1502 | if (!rec) | 1502 | if (!rec) |
1503 | return t_hash_start(m, pos); | 1503 | return t_hash_start(m, pos); |
1504 | 1504 | ||
1505 | iter->func_pos = *pos; | ||
1506 | iter->func = rec; | 1505 | iter->func = rec; |
1507 | 1506 | ||
1508 | return iter; | 1507 | return iter; |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index db7b439d23ee..d9c8bcafb120 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -668,7 +668,7 @@ static struct list_head *rb_list_head(struct list_head *list) | |||
668 | * the reader page). But if the next page is a header page, | 668 | * the reader page). But if the next page is a header page, |
669 | * its flags will be non zero. | 669 | * its flags will be non zero. |
670 | */ | 670 | */ |
671 | static int inline | 671 | static inline int |
672 | rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, | 672 | rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, |
673 | struct buffer_page *page, struct list_head *list) | 673 | struct buffer_page *page, struct list_head *list) |
674 | { | 674 | { |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 3249b4f77ef0..8008ddcfbf20 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -391,8 +391,8 @@ static int process_ops(struct filter_pred *preds, | |||
391 | struct filter_pred *op, void *rec) | 391 | struct filter_pred *op, void *rec) |
392 | { | 392 | { |
393 | struct filter_pred *pred; | 393 | struct filter_pred *pred; |
394 | int match = 0; | ||
394 | int type; | 395 | int type; |
395 | int match; | ||
396 | int i; | 396 | int i; |
397 | 397 | ||
398 | /* | 398 | /* |
diff --git a/kernel/uid16.c b/kernel/uid16.c index 419209893d87..51c6e89e8619 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c | |||
@@ -189,7 +189,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) | |||
189 | struct group_info *group_info; | 189 | struct group_info *group_info; |
190 | int retval; | 190 | int retval; |
191 | 191 | ||
192 | if (!capable(CAP_SETGID)) | 192 | if (!nsown_capable(CAP_SETGID)) |
193 | return -EPERM; | 193 | return -EPERM; |
194 | if ((unsigned)gidsetsize > NGROUPS_MAX) | 194 | if ((unsigned)gidsetsize > NGROUPS_MAX) |
195 | return -EINVAL; | 195 | return -EINVAL; |
diff --git a/kernel/user.c b/kernel/user.c index 5c598ca781df..9e03e9c1df8d 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -17,9 +17,13 @@ | |||
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/user_namespace.h> | 18 | #include <linux/user_namespace.h> |
19 | 19 | ||
20 | /* | ||
21 | * userns count is 1 for root user, 1 for init_uts_ns, | ||
22 | * and 1 for... ? | ||
23 | */ | ||
20 | struct user_namespace init_user_ns = { | 24 | struct user_namespace init_user_ns = { |
21 | .kref = { | 25 | .kref = { |
22 | .refcount = ATOMIC_INIT(2), | 26 | .refcount = ATOMIC_INIT(3), |
23 | }, | 27 | }, |
24 | .creator = &root_user, | 28 | .creator = &root_user, |
25 | }; | 29 | }; |
@@ -47,7 +51,7 @@ static struct kmem_cache *uid_cachep; | |||
47 | */ | 51 | */ |
48 | static DEFINE_SPINLOCK(uidhash_lock); | 52 | static DEFINE_SPINLOCK(uidhash_lock); |
49 | 53 | ||
50 | /* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */ | 54 | /* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->user_ns */ |
51 | struct user_struct root_user = { | 55 | struct user_struct root_user = { |
52 | .__count = ATOMIC_INIT(2), | 56 | .__count = ATOMIC_INIT(2), |
53 | .processes = ATOMIC_INIT(1), | 57 | .processes = ATOMIC_INIT(1), |
diff --git a/kernel/utsname.c b/kernel/utsname.c index 8a82b4b8ea52..44646179eaba 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/utsname.h> | 14 | #include <linux/utsname.h> |
15 | #include <linux/err.h> | 15 | #include <linux/err.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/user_namespace.h> | ||
17 | 18 | ||
18 | static struct uts_namespace *create_uts_ns(void) | 19 | static struct uts_namespace *create_uts_ns(void) |
19 | { | 20 | { |
@@ -30,7 +31,8 @@ static struct uts_namespace *create_uts_ns(void) | |||
30 | * @old_ns: namespace to clone | 31 | * @old_ns: namespace to clone |
31 | * Return NULL on error (failure to kmalloc), new ns otherwise | 32 | * Return NULL on error (failure to kmalloc), new ns otherwise |
32 | */ | 33 | */ |
33 | static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | 34 | static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, |
35 | struct uts_namespace *old_ns) | ||
34 | { | 36 | { |
35 | struct uts_namespace *ns; | 37 | struct uts_namespace *ns; |
36 | 38 | ||
@@ -40,6 +42,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | |||
40 | 42 | ||
41 | down_read(&uts_sem); | 43 | down_read(&uts_sem); |
42 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); | 44 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); |
45 | ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns); | ||
43 | up_read(&uts_sem); | 46 | up_read(&uts_sem); |
44 | return ns; | 47 | return ns; |
45 | } | 48 | } |
@@ -50,8 +53,10 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | |||
50 | * utsname of this process won't be seen by parent, and vice | 53 | * utsname of this process won't be seen by parent, and vice |
51 | * versa. | 54 | * versa. |
52 | */ | 55 | */ |
53 | struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns) | 56 | struct uts_namespace *copy_utsname(unsigned long flags, |
57 | struct task_struct *tsk) | ||
54 | { | 58 | { |
59 | struct uts_namespace *old_ns = tsk->nsproxy->uts_ns; | ||
55 | struct uts_namespace *new_ns; | 60 | struct uts_namespace *new_ns; |
56 | 61 | ||
57 | BUG_ON(!old_ns); | 62 | BUG_ON(!old_ns); |
@@ -60,7 +65,7 @@ struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *ol | |||
60 | if (!(flags & CLONE_NEWUTS)) | 65 | if (!(flags & CLONE_NEWUTS)) |
61 | return old_ns; | 66 | return old_ns; |
62 | 67 | ||
63 | new_ns = clone_uts_ns(old_ns); | 68 | new_ns = clone_uts_ns(tsk, old_ns); |
64 | 69 | ||
65 | put_uts_ns(old_ns); | 70 | put_uts_ns(old_ns); |
66 | return new_ns; | 71 | return new_ns; |
@@ -71,5 +76,6 @@ void free_uts_ns(struct kref *kref) | |||
71 | struct uts_namespace *ns; | 76 | struct uts_namespace *ns; |
72 | 77 | ||
73 | ns = container_of(kref, struct uts_namespace, kref); | 78 | ns = container_of(kref, struct uts_namespace, kref); |
79 | put_user_ns(ns->user_ns); | ||
74 | kfree(ns); | 80 | kfree(ns); |
75 | } | 81 | } |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 18bb15776c57..140dce750450 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -48,12 +48,15 @@ static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | |||
48 | * Should we panic when a soft-lockup or hard-lockup occurs: | 48 | * Should we panic when a soft-lockup or hard-lockup occurs: |
49 | */ | 49 | */ |
50 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 50 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
51 | static int hardlockup_panic; | 51 | static int hardlockup_panic = |
52 | CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; | ||
52 | 53 | ||
53 | static int __init hardlockup_panic_setup(char *str) | 54 | static int __init hardlockup_panic_setup(char *str) |
54 | { | 55 | { |
55 | if (!strncmp(str, "panic", 5)) | 56 | if (!strncmp(str, "panic", 5)) |
56 | hardlockup_panic = 1; | 57 | hardlockup_panic = 1; |
58 | else if (!strncmp(str, "nopanic", 7)) | ||
59 | hardlockup_panic = 0; | ||
57 | else if (!strncmp(str, "0", 1)) | 60 | else if (!strncmp(str, "0", 1)) |
58 | watchdog_enabled = 0; | 61 | watchdog_enabled = 0; |
59 | return 1; | 62 | return 1; |
@@ -415,19 +418,22 @@ static int watchdog_prepare_cpu(int cpu) | |||
415 | static int watchdog_enable(int cpu) | 418 | static int watchdog_enable(int cpu) |
416 | { | 419 | { |
417 | struct task_struct *p = per_cpu(softlockup_watchdog, cpu); | 420 | struct task_struct *p = per_cpu(softlockup_watchdog, cpu); |
418 | int err; | 421 | int err = 0; |
419 | 422 | ||
420 | /* enable the perf event */ | 423 | /* enable the perf event */ |
421 | err = watchdog_nmi_enable(cpu); | 424 | err = watchdog_nmi_enable(cpu); |
422 | if (err) | 425 | |
423 | return err; | 426 | /* Regardless of err above, fall through and start softlockup */ |
424 | 427 | ||
425 | /* create the watchdog thread */ | 428 | /* create the watchdog thread */ |
426 | if (!p) { | 429 | if (!p) { |
427 | p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); | 430 | p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); |
428 | if (IS_ERR(p)) { | 431 | if (IS_ERR(p)) { |
429 | printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); | 432 | printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); |
430 | return PTR_ERR(p); | 433 | if (!err) |
434 | /* if hardlockup hasn't already set this */ | ||
435 | err = PTR_ERR(p); | ||
436 | goto out; | ||
431 | } | 437 | } |
432 | kthread_bind(p, cpu); | 438 | kthread_bind(p, cpu); |
433 | per_cpu(watchdog_touch_ts, cpu) = 0; | 439 | per_cpu(watchdog_touch_ts, cpu) = 0; |
@@ -435,7 +441,8 @@ static int watchdog_enable(int cpu) | |||
435 | wake_up_process(p); | 441 | wake_up_process(p); |
436 | } | 442 | } |
437 | 443 | ||
438 | return 0; | 444 | out: |
445 | return err; | ||
439 | } | 446 | } |
440 | 447 | ||
441 | static void watchdog_disable(int cpu) | 448 | static void watchdog_disable(int cpu) |
@@ -547,7 +554,13 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
547 | break; | 554 | break; |
548 | #endif /* CONFIG_HOTPLUG_CPU */ | 555 | #endif /* CONFIG_HOTPLUG_CPU */ |
549 | } | 556 | } |
550 | return notifier_from_errno(err); | 557 | |
558 | /* | ||
559 | * hardlockup and softlockup are not important enough | ||
560 | * to block cpu bring up. Just always succeed and | ||
561 | * rely on printk output to flag problems. | ||
562 | */ | ||
563 | return NOTIFY_OK; | ||
551 | } | 564 | } |
552 | 565 | ||
553 | static struct notifier_block __cpuinitdata cpu_nfb = { | 566 | static struct notifier_block __cpuinitdata cpu_nfb = { |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b5fe4c00eb3c..04ef830690ec 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -251,10 +251,12 @@ struct workqueue_struct *system_wq __read_mostly; | |||
251 | struct workqueue_struct *system_long_wq __read_mostly; | 251 | struct workqueue_struct *system_long_wq __read_mostly; |
252 | struct workqueue_struct *system_nrt_wq __read_mostly; | 252 | struct workqueue_struct *system_nrt_wq __read_mostly; |
253 | struct workqueue_struct *system_unbound_wq __read_mostly; | 253 | struct workqueue_struct *system_unbound_wq __read_mostly; |
254 | struct workqueue_struct *system_freezable_wq __read_mostly; | ||
254 | EXPORT_SYMBOL_GPL(system_wq); | 255 | EXPORT_SYMBOL_GPL(system_wq); |
255 | EXPORT_SYMBOL_GPL(system_long_wq); | 256 | EXPORT_SYMBOL_GPL(system_long_wq); |
256 | EXPORT_SYMBOL_GPL(system_nrt_wq); | 257 | EXPORT_SYMBOL_GPL(system_nrt_wq); |
257 | EXPORT_SYMBOL_GPL(system_unbound_wq); | 258 | EXPORT_SYMBOL_GPL(system_unbound_wq); |
259 | EXPORT_SYMBOL_GPL(system_freezable_wq); | ||
258 | 260 | ||
259 | #define CREATE_TRACE_POINTS | 261 | #define CREATE_TRACE_POINTS |
260 | #include <trace/events/workqueue.h> | 262 | #include <trace/events/workqueue.h> |
@@ -1364,8 +1366,10 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind) | |||
1364 | worker->id = id; | 1366 | worker->id = id; |
1365 | 1367 | ||
1366 | if (!on_unbound_cpu) | 1368 | if (!on_unbound_cpu) |
1367 | worker->task = kthread_create(worker_thread, worker, | 1369 | worker->task = kthread_create_on_node(worker_thread, |
1368 | "kworker/%u:%d", gcwq->cpu, id); | 1370 | worker, |
1371 | cpu_to_node(gcwq->cpu), | ||
1372 | "kworker/%u:%d", gcwq->cpu, id); | ||
1369 | else | 1373 | else |
1370 | worker->task = kthread_create(worker_thread, worker, | 1374 | worker->task = kthread_create(worker_thread, worker, |
1371 | "kworker/u:%d", id); | 1375 | "kworker/u:%d", id); |
@@ -3781,8 +3785,10 @@ static int __init init_workqueues(void) | |||
3781 | system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); | 3785 | system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); |
3782 | system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, | 3786 | system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, |
3783 | WQ_UNBOUND_MAX_ACTIVE); | 3787 | WQ_UNBOUND_MAX_ACTIVE); |
3788 | system_freezable_wq = alloc_workqueue("events_freezable", | ||
3789 | WQ_FREEZABLE, 0); | ||
3784 | BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq || | 3790 | BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq || |
3785 | !system_unbound_wq); | 3791 | !system_unbound_wq || !system_freezable_wq); |
3786 | return 0; | 3792 | return 0; |
3787 | } | 3793 | } |
3788 | early_initcall(init_workqueues); | 3794 | early_initcall(init_workqueues); |