aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditsc.c5
-rw-r--r--kernel/events/callchain.c2
-rw-r--r--kernel/events/core.c125
-rw-r--r--kernel/events/hw_breakpoint.c4
-rw-r--r--kernel/exit.c16
-rw-r--r--kernel/fork.c87
-rw-r--r--kernel/hung_task.c11
-rw-r--r--kernel/irq/autoprobe.c4
-rw-r--r--kernel/irq/chip.c42
-rw-r--r--kernel/irq/internals.h2
-rw-r--r--kernel/irq/manage.c46
-rw-r--r--kernel/kprobes.c20
-rw-r--r--kernel/params.c3
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/power/power.h24
-rw-r--r--kernel/power/process.c26
-rw-r--r--kernel/power/snapshot.c3
-rw-r--r--kernel/power/user.c15
-rw-r--r--kernel/printk.c6
-rw-r--r--kernel/rcutorture.c8
-rw-r--r--kernel/relay.c10
-rw-r--r--kernel/res_counter.c25
-rw-r--r--kernel/sched/core.c20
-rw-r--r--kernel/sched/cpupri.c3
-rw-r--r--kernel/sched/fair.c36
-rw-r--r--kernel/sched/rt.c5
-rw-r--r--kernel/watchdog.c2
27 files changed, 414 insertions, 140 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index caaea6e944f8..af1de0f34eae 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1863,11 +1863,12 @@ void __audit_syscall_entry(int arch, int major,
1863 1863
1864/** 1864/**
1865 * audit_syscall_exit - deallocate audit context after a system call 1865 * audit_syscall_exit - deallocate audit context after a system call
1866 * @pt_regs: syscall registers 1866 * @success: success value of the syscall
1867 * @return_code: return value of the syscall
1867 * 1868 *
1868 * Tear down after system call. If the audit context has been marked as 1869 * Tear down after system call. If the audit context has been marked as
1869 * auditable (either because of the AUDIT_RECORD_CONTEXT state from 1870 * auditable (either because of the AUDIT_RECORD_CONTEXT state from
1870 * filtering, or because some other part of the kernel write an audit 1871 * filtering, or because some other part of the kernel wrote an audit
1871 * message), then write out the syscall information. In call cases, 1872 * message), then write out the syscall information. In call cases,
1872 * free the names stored from getname(). 1873 * free the names stored from getname().
1873 */ 1874 */
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 057e24b665cf..6581a040f399 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -115,8 +115,6 @@ int get_callchain_buffers(void)
115 } 115 }
116 116
117 err = alloc_callchain_buffers(); 117 err = alloc_callchain_buffers();
118 if (err)
119 release_callchain_buffers();
120exit: 118exit:
121 mutex_unlock(&callchain_mutex); 119 mutex_unlock(&callchain_mutex);
122 120
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a8f4ac001a00..1b5c081d8b9f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -815,7 +815,7 @@ static void update_event_times(struct perf_event *event)
815 * here. 815 * here.
816 */ 816 */
817 if (is_cgroup_event(event)) 817 if (is_cgroup_event(event))
818 run_end = perf_event_time(event); 818 run_end = perf_cgroup_event_time(event);
819 else if (ctx->is_active) 819 else if (ctx->is_active)
820 run_end = ctx->time; 820 run_end = ctx->time;
821 else 821 else
@@ -2300,7 +2300,10 @@ do { \
2300 return div64_u64(dividend, divisor); 2300 return div64_u64(dividend, divisor);
2301} 2301}
2302 2302
2303static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) 2303static DEFINE_PER_CPU(int, perf_throttled_count);
2304static DEFINE_PER_CPU(u64, perf_throttled_seq);
2305
2306static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bool disable)
2304{ 2307{
2305 struct hw_perf_event *hwc = &event->hw; 2308 struct hw_perf_event *hwc = &event->hw;
2306 s64 period, sample_period; 2309 s64 period, sample_period;
@@ -2319,22 +2322,40 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
2319 hwc->sample_period = sample_period; 2322 hwc->sample_period = sample_period;
2320 2323
2321 if (local64_read(&hwc->period_left) > 8*sample_period) { 2324 if (local64_read(&hwc->period_left) > 8*sample_period) {
2322 event->pmu->stop(event, PERF_EF_UPDATE); 2325 if (disable)
2326 event->pmu->stop(event, PERF_EF_UPDATE);
2327
2323 local64_set(&hwc->period_left, 0); 2328 local64_set(&hwc->period_left, 0);
2324 event->pmu->start(event, PERF_EF_RELOAD); 2329
2330 if (disable)
2331 event->pmu->start(event, PERF_EF_RELOAD);
2325 } 2332 }
2326} 2333}
2327 2334
2328static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) 2335/*
2336 * combine freq adjustment with unthrottling to avoid two passes over the
2337 * events. At the same time, make sure, having freq events does not change
2338 * the rate of unthrottling as that would introduce bias.
2339 */
2340static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
2341 int needs_unthr)
2329{ 2342{
2330 struct perf_event *event; 2343 struct perf_event *event;
2331 struct hw_perf_event *hwc; 2344 struct hw_perf_event *hwc;
2332 u64 interrupts, now; 2345 u64 now, period = TICK_NSEC;
2333 s64 delta; 2346 s64 delta;
2334 2347
2335 if (!ctx->nr_freq) 2348 /*
2349 * only need to iterate over all events iff:
2350 * - context have events in frequency mode (needs freq adjust)
2351 * - there are events to unthrottle on this cpu
2352 */
2353 if (!(ctx->nr_freq || needs_unthr))
2336 return; 2354 return;
2337 2355
2356 raw_spin_lock(&ctx->lock);
2357 perf_pmu_disable(ctx->pmu);
2358
2338 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 2359 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
2339 if (event->state != PERF_EVENT_STATE_ACTIVE) 2360 if (event->state != PERF_EVENT_STATE_ACTIVE)
2340 continue; 2361 continue;
@@ -2344,13 +2365,8 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
2344 2365
2345 hwc = &event->hw; 2366 hwc = &event->hw;
2346 2367
2347 interrupts = hwc->interrupts; 2368 if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) {
2348 hwc->interrupts = 0; 2369 hwc->interrupts = 0;
2349
2350 /*
2351 * unthrottle events on the tick
2352 */
2353 if (interrupts == MAX_INTERRUPTS) {
2354 perf_log_throttle(event, 1); 2370 perf_log_throttle(event, 1);
2355 event->pmu->start(event, 0); 2371 event->pmu->start(event, 0);
2356 } 2372 }
@@ -2358,14 +2374,30 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
2358 if (!event->attr.freq || !event->attr.sample_freq) 2374 if (!event->attr.freq || !event->attr.sample_freq)
2359 continue; 2375 continue;
2360 2376
2361 event->pmu->read(event); 2377 /*
2378 * stop the event and update event->count
2379 */
2380 event->pmu->stop(event, PERF_EF_UPDATE);
2381
2362 now = local64_read(&event->count); 2382 now = local64_read(&event->count);
2363 delta = now - hwc->freq_count_stamp; 2383 delta = now - hwc->freq_count_stamp;
2364 hwc->freq_count_stamp = now; 2384 hwc->freq_count_stamp = now;
2365 2385
2386 /*
2387 * restart the event
2388 * reload only if value has changed
2389 * we have stopped the event so tell that
2390 * to perf_adjust_period() to avoid stopping it
2391 * twice.
2392 */
2366 if (delta > 0) 2393 if (delta > 0)
2367 perf_adjust_period(event, period, delta); 2394 perf_adjust_period(event, period, delta, false);
2395
2396 event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
2368 } 2397 }
2398
2399 perf_pmu_enable(ctx->pmu);
2400 raw_spin_unlock(&ctx->lock);
2369} 2401}
2370 2402
2371/* 2403/*
@@ -2388,16 +2420,13 @@ static void rotate_ctx(struct perf_event_context *ctx)
2388 */ 2420 */
2389static void perf_rotate_context(struct perf_cpu_context *cpuctx) 2421static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2390{ 2422{
2391 u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
2392 struct perf_event_context *ctx = NULL; 2423 struct perf_event_context *ctx = NULL;
2393 int rotate = 0, remove = 1, freq = 0; 2424 int rotate = 0, remove = 1;
2394 2425
2395 if (cpuctx->ctx.nr_events) { 2426 if (cpuctx->ctx.nr_events) {
2396 remove = 0; 2427 remove = 0;
2397 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) 2428 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
2398 rotate = 1; 2429 rotate = 1;
2399 if (cpuctx->ctx.nr_freq)
2400 freq = 1;
2401 } 2430 }
2402 2431
2403 ctx = cpuctx->task_ctx; 2432 ctx = cpuctx->task_ctx;
@@ -2405,37 +2434,26 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2405 remove = 0; 2434 remove = 0;
2406 if (ctx->nr_events != ctx->nr_active) 2435 if (ctx->nr_events != ctx->nr_active)
2407 rotate = 1; 2436 rotate = 1;
2408 if (ctx->nr_freq)
2409 freq = 1;
2410 } 2437 }
2411 2438
2412 if (!rotate && !freq) 2439 if (!rotate)
2413 goto done; 2440 goto done;
2414 2441
2415 perf_ctx_lock(cpuctx, cpuctx->task_ctx); 2442 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
2416 perf_pmu_disable(cpuctx->ctx.pmu); 2443 perf_pmu_disable(cpuctx->ctx.pmu);
2417 2444
2418 if (freq) { 2445 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2419 perf_ctx_adjust_freq(&cpuctx->ctx, interval); 2446 if (ctx)
2420 if (ctx) 2447 ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
2421 perf_ctx_adjust_freq(ctx, interval);
2422 }
2423
2424 if (rotate) {
2425 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2426 if (ctx)
2427 ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
2428 2448
2429 rotate_ctx(&cpuctx->ctx); 2449 rotate_ctx(&cpuctx->ctx);
2430 if (ctx) 2450 if (ctx)
2431 rotate_ctx(ctx); 2451 rotate_ctx(ctx);
2432 2452
2433 perf_event_sched_in(cpuctx, ctx, current); 2453 perf_event_sched_in(cpuctx, ctx, current);
2434 }
2435 2454
2436 perf_pmu_enable(cpuctx->ctx.pmu); 2455 perf_pmu_enable(cpuctx->ctx.pmu);
2437 perf_ctx_unlock(cpuctx, cpuctx->task_ctx); 2456 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2438
2439done: 2457done:
2440 if (remove) 2458 if (remove)
2441 list_del_init(&cpuctx->rotation_list); 2459 list_del_init(&cpuctx->rotation_list);
@@ -2445,10 +2463,22 @@ void perf_event_task_tick(void)
2445{ 2463{
2446 struct list_head *head = &__get_cpu_var(rotation_list); 2464 struct list_head *head = &__get_cpu_var(rotation_list);
2447 struct perf_cpu_context *cpuctx, *tmp; 2465 struct perf_cpu_context *cpuctx, *tmp;
2466 struct perf_event_context *ctx;
2467 int throttled;
2448 2468
2449 WARN_ON(!irqs_disabled()); 2469 WARN_ON(!irqs_disabled());
2450 2470
2471 __this_cpu_inc(perf_throttled_seq);
2472 throttled = __this_cpu_xchg(perf_throttled_count, 0);
2473
2451 list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { 2474 list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
2475 ctx = &cpuctx->ctx;
2476 perf_adjust_freq_unthr_context(ctx, throttled);
2477
2478 ctx = cpuctx->task_ctx;
2479 if (ctx)
2480 perf_adjust_freq_unthr_context(ctx, throttled);
2481
2452 if (cpuctx->jiffies_interval == 1 || 2482 if (cpuctx->jiffies_interval == 1 ||
2453 !(jiffies % cpuctx->jiffies_interval)) 2483 !(jiffies % cpuctx->jiffies_interval))
2454 perf_rotate_context(cpuctx); 2484 perf_rotate_context(cpuctx);
@@ -4509,6 +4539,7 @@ static int __perf_event_overflow(struct perf_event *event,
4509{ 4539{
4510 int events = atomic_read(&event->event_limit); 4540 int events = atomic_read(&event->event_limit);
4511 struct hw_perf_event *hwc = &event->hw; 4541 struct hw_perf_event *hwc = &event->hw;
4542 u64 seq;
4512 int ret = 0; 4543 int ret = 0;
4513 4544
4514 /* 4545 /*
@@ -4518,14 +4549,20 @@ static int __perf_event_overflow(struct perf_event *event,
4518 if (unlikely(!is_sampling_event(event))) 4549 if (unlikely(!is_sampling_event(event)))
4519 return 0; 4550 return 0;
4520 4551
4521 if (unlikely(hwc->interrupts >= max_samples_per_tick)) { 4552 seq = __this_cpu_read(perf_throttled_seq);
4522 if (throttle) { 4553 if (seq != hwc->interrupts_seq) {
4554 hwc->interrupts_seq = seq;
4555 hwc->interrupts = 1;
4556 } else {
4557 hwc->interrupts++;
4558 if (unlikely(throttle
4559 && hwc->interrupts >= max_samples_per_tick)) {
4560 __this_cpu_inc(perf_throttled_count);
4523 hwc->interrupts = MAX_INTERRUPTS; 4561 hwc->interrupts = MAX_INTERRUPTS;
4524 perf_log_throttle(event, 0); 4562 perf_log_throttle(event, 0);
4525 ret = 1; 4563 ret = 1;
4526 } 4564 }
4527 } else 4565 }
4528 hwc->interrupts++;
4529 4566
4530 if (event->attr.freq) { 4567 if (event->attr.freq) {
4531 u64 now = perf_clock(); 4568 u64 now = perf_clock();
@@ -4534,7 +4571,7 @@ static int __perf_event_overflow(struct perf_event *event,
4534 hwc->freq_time_stamp = now; 4571 hwc->freq_time_stamp = now;
4535 4572
4536 if (delta > 0 && delta < 2*TICK_NSEC) 4573 if (delta > 0 && delta < 2*TICK_NSEC)
4537 perf_adjust_period(event, delta, hwc->last_period); 4574 perf_adjust_period(event, delta, hwc->last_period, true);
4538 } 4575 }
4539 4576
4540 /* 4577 /*
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index b7971d6f38bf..ee706ce44aa0 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -651,10 +651,10 @@ int __init init_hw_breakpoint(void)
651 651
652 err_alloc: 652 err_alloc:
653 for_each_possible_cpu(err_cpu) { 653 for_each_possible_cpu(err_cpu) {
654 if (err_cpu == cpu)
655 break;
656 for (i = 0; i < TYPE_MAX; i++) 654 for (i = 0; i < TYPE_MAX; i++)
657 kfree(per_cpu(nr_task_bp_pinned[i], cpu)); 655 kfree(per_cpu(nr_task_bp_pinned[i], cpu));
656 if (err_cpu == cpu)
657 break;
658 } 658 }
659 659
660 return -ENOMEM; 660 return -ENOMEM;
diff --git a/kernel/exit.c b/kernel/exit.c
index 294b1709170d..4b4042f9bc6a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1038,6 +1038,22 @@ void do_exit(long code)
1038 if (tsk->nr_dirtied) 1038 if (tsk->nr_dirtied)
1039 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); 1039 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
1040 exit_rcu(); 1040 exit_rcu();
1041
1042 /*
1043 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
1044 * when the following two conditions become true.
1045 * - There is race condition of mmap_sem (It is acquired by
1046 * exit_mm()), and
1047 * - SMI occurs before setting TASK_RUNINNG.
1048 * (or hypervisor of virtual machine switches to other guest)
1049 * As a result, we may become TASK_RUNNING after becoming TASK_DEAD
1050 *
1051 * To avoid it, we have to wait for releasing tsk->pi_lock which
1052 * is held by try_to_wake_up()
1053 */
1054 smp_mb();
1055 raw_spin_unlock_wait(&tsk->pi_lock);
1056
1041 /* causes final put_task_struct in finish_task_switch(). */ 1057 /* causes final put_task_struct in finish_task_switch(). */
1042 tsk->state = TASK_DEAD; 1058 tsk->state = TASK_DEAD;
1043 tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */ 1059 tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
diff --git a/kernel/fork.c b/kernel/fork.c
index 051f090d40c1..26a7a6707fa7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -66,6 +66,7 @@
66#include <linux/user-return-notifier.h> 66#include <linux/user-return-notifier.h>
67#include <linux/oom.h> 67#include <linux/oom.h>
68#include <linux/khugepaged.h> 68#include <linux/khugepaged.h>
69#include <linux/signalfd.h>
69 70
70#include <asm/pgtable.h> 71#include <asm/pgtable.h>
71#include <asm/pgalloc.h> 72#include <asm/pgalloc.h>
@@ -647,6 +648,58 @@ struct mm_struct *get_task_mm(struct task_struct *task)
647} 648}
648EXPORT_SYMBOL_GPL(get_task_mm); 649EXPORT_SYMBOL_GPL(get_task_mm);
649 650
651struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
652{
653 struct mm_struct *mm;
654 int err;
655
656 err = mutex_lock_killable(&task->signal->cred_guard_mutex);
657 if (err)
658 return ERR_PTR(err);
659
660 mm = get_task_mm(task);
661 if (mm && mm != current->mm &&
662 !ptrace_may_access(task, mode)) {
663 mmput(mm);
664 mm = ERR_PTR(-EACCES);
665 }
666 mutex_unlock(&task->signal->cred_guard_mutex);
667
668 return mm;
669}
670
671static void complete_vfork_done(struct task_struct *tsk)
672{
673 struct completion *vfork;
674
675 task_lock(tsk);
676 vfork = tsk->vfork_done;
677 if (likely(vfork)) {
678 tsk->vfork_done = NULL;
679 complete(vfork);
680 }
681 task_unlock(tsk);
682}
683
684static int wait_for_vfork_done(struct task_struct *child,
685 struct completion *vfork)
686{
687 int killed;
688
689 freezer_do_not_count();
690 killed = wait_for_completion_killable(vfork);
691 freezer_count();
692
693 if (killed) {
694 task_lock(child);
695 child->vfork_done = NULL;
696 task_unlock(child);
697 }
698
699 put_task_struct(child);
700 return killed;
701}
702
650/* Please note the differences between mmput and mm_release. 703/* Please note the differences between mmput and mm_release.
651 * mmput is called whenever we stop holding onto a mm_struct, 704 * mmput is called whenever we stop holding onto a mm_struct,
652 * error success whatever. 705 * error success whatever.
@@ -662,8 +715,6 @@ EXPORT_SYMBOL_GPL(get_task_mm);
662 */ 715 */
663void mm_release(struct task_struct *tsk, struct mm_struct *mm) 716void mm_release(struct task_struct *tsk, struct mm_struct *mm)
664{ 717{
665 struct completion *vfork_done = tsk->vfork_done;
666
667 /* Get rid of any futexes when releasing the mm */ 718 /* Get rid of any futexes when releasing the mm */
668#ifdef CONFIG_FUTEX 719#ifdef CONFIG_FUTEX
669 if (unlikely(tsk->robust_list)) { 720 if (unlikely(tsk->robust_list)) {
@@ -683,17 +734,15 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
683 /* Get rid of any cached register state */ 734 /* Get rid of any cached register state */
684 deactivate_mm(tsk, mm); 735 deactivate_mm(tsk, mm);
685 736
686 /* notify parent sleeping on vfork() */ 737 if (tsk->vfork_done)
687 if (vfork_done) { 738 complete_vfork_done(tsk);
688 tsk->vfork_done = NULL;
689 complete(vfork_done);
690 }
691 739
692 /* 740 /*
693 * If we're exiting normally, clear a user-space tid field if 741 * If we're exiting normally, clear a user-space tid field if
694 * requested. We leave this alone when dying by signal, to leave 742 * requested. We leave this alone when dying by signal, to leave
695 * the value intact in a core dump, and to save the unnecessary 743 * the value intact in a core dump, and to save the unnecessary
696 * trouble otherwise. Userland only wants this done for a sys_exit. 744 * trouble, say, a killed vfork parent shouldn't touch this mm.
745 * Userland only wants this done for a sys_exit.
697 */ 746 */
698 if (tsk->clear_child_tid) { 747 if (tsk->clear_child_tid) {
699 if (!(tsk->flags & PF_SIGNALED) && 748 if (!(tsk->flags & PF_SIGNALED) &&
@@ -890,7 +939,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
890 return -ENOMEM; 939 return -ENOMEM;
891 940
892 new_ioc->ioprio = ioc->ioprio; 941 new_ioc->ioprio = ioc->ioprio;
893 put_io_context(new_ioc, NULL); 942 put_io_context(new_ioc);
894 } 943 }
895#endif 944#endif
896 return 0; 945 return 0;
@@ -915,8 +964,10 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
915 964
916void __cleanup_sighand(struct sighand_struct *sighand) 965void __cleanup_sighand(struct sighand_struct *sighand)
917{ 966{
918 if (atomic_dec_and_test(&sighand->count)) 967 if (atomic_dec_and_test(&sighand->count)) {
968 signalfd_cleanup(sighand);
919 kmem_cache_free(sighand_cachep, sighand); 969 kmem_cache_free(sighand_cachep, sighand);
970 }
920} 971}
921 972
922 973
@@ -995,7 +1046,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
995 1046
996 new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); 1047 new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
997 new_flags |= PF_FORKNOEXEC; 1048 new_flags |= PF_FORKNOEXEC;
998 new_flags |= PF_STARTING;
999 p->flags = new_flags; 1049 p->flags = new_flags;
1000} 1050}
1001 1051
@@ -1525,16 +1575,9 @@ long do_fork(unsigned long clone_flags,
1525 if (clone_flags & CLONE_VFORK) { 1575 if (clone_flags & CLONE_VFORK) {
1526 p->vfork_done = &vfork; 1576 p->vfork_done = &vfork;
1527 init_completion(&vfork); 1577 init_completion(&vfork);
1578 get_task_struct(p);
1528 } 1579 }
1529 1580
1530 /*
1531 * We set PF_STARTING at creation in case tracing wants to
1532 * use this to distinguish a fully live task from one that
1533 * hasn't finished SIGSTOP raising yet. Now we clear it
1534 * and set the child going.
1535 */
1536 p->flags &= ~PF_STARTING;
1537
1538 wake_up_new_task(p); 1581 wake_up_new_task(p);
1539 1582
1540 /* forking complete and child started to run, tell ptracer */ 1583 /* forking complete and child started to run, tell ptracer */
@@ -1542,10 +1585,8 @@ long do_fork(unsigned long clone_flags,
1542 ptrace_event(trace, nr); 1585 ptrace_event(trace, nr);
1543 1586
1544 if (clone_flags & CLONE_VFORK) { 1587 if (clone_flags & CLONE_VFORK) {
1545 freezer_do_not_count(); 1588 if (!wait_for_vfork_done(p, &vfork))
1546 wait_for_completion(&vfork); 1589 ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
1547 freezer_count();
1548 ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
1549 } 1590 }
1550 } else { 1591 } else {
1551 nr = PTR_ERR(p); 1592 nr = PTR_ERR(p);
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 2e48ec0c2e91..c21449f85a2a 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -119,15 +119,20 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
119 * For preemptible RCU it is sufficient to call rcu_read_unlock in order 119 * For preemptible RCU it is sufficient to call rcu_read_unlock in order
120 * to exit the grace period. For classic RCU, a reschedule is required. 120 * to exit the grace period. For classic RCU, a reschedule is required.
121 */ 121 */
122static void rcu_lock_break(struct task_struct *g, struct task_struct *t) 122static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
123{ 123{
124 bool can_cont;
125
124 get_task_struct(g); 126 get_task_struct(g);
125 get_task_struct(t); 127 get_task_struct(t);
126 rcu_read_unlock(); 128 rcu_read_unlock();
127 cond_resched(); 129 cond_resched();
128 rcu_read_lock(); 130 rcu_read_lock();
131 can_cont = pid_alive(g) && pid_alive(t);
129 put_task_struct(t); 132 put_task_struct(t);
130 put_task_struct(g); 133 put_task_struct(g);
134
135 return can_cont;
131} 136}
132 137
133/* 138/*
@@ -154,9 +159,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
154 goto unlock; 159 goto unlock;
155 if (!--batch_count) { 160 if (!--batch_count) {
156 batch_count = HUNG_TASK_BATCHING; 161 batch_count = HUNG_TASK_BATCHING;
157 rcu_lock_break(g, t); 162 if (!rcu_lock_break(g, t))
158 /* Exit if t or g was unhashed during refresh. */
159 if (t->state == TASK_DEAD || g->state == TASK_DEAD)
160 goto unlock; 163 goto unlock;
161 } 164 }
162 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ 165 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 342d8f44e401..0119b9d467ae 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -53,7 +53,7 @@ unsigned long probe_irq_on(void)
53 if (desc->irq_data.chip->irq_set_type) 53 if (desc->irq_data.chip->irq_set_type)
54 desc->irq_data.chip->irq_set_type(&desc->irq_data, 54 desc->irq_data.chip->irq_set_type(&desc->irq_data,
55 IRQ_TYPE_PROBE); 55 IRQ_TYPE_PROBE);
56 irq_startup(desc); 56 irq_startup(desc, false);
57 } 57 }
58 raw_spin_unlock_irq(&desc->lock); 58 raw_spin_unlock_irq(&desc->lock);
59 } 59 }
@@ -70,7 +70,7 @@ unsigned long probe_irq_on(void)
70 raw_spin_lock_irq(&desc->lock); 70 raw_spin_lock_irq(&desc->lock);
71 if (!desc->action && irq_settings_can_probe(desc)) { 71 if (!desc->action && irq_settings_can_probe(desc)) {
72 desc->istate |= IRQS_AUTODETECT | IRQS_WAITING; 72 desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
73 if (irq_startup(desc)) 73 if (irq_startup(desc, false))
74 desc->istate |= IRQS_PENDING; 74 desc->istate |= IRQS_PENDING;
75 } 75 }
76 raw_spin_unlock_irq(&desc->lock); 76 raw_spin_unlock_irq(&desc->lock);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f7c543a801d9..fb7db75ee0c8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -157,19 +157,22 @@ static void irq_state_set_masked(struct irq_desc *desc)
157 irqd_set(&desc->irq_data, IRQD_IRQ_MASKED); 157 irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
158} 158}
159 159
160int irq_startup(struct irq_desc *desc) 160int irq_startup(struct irq_desc *desc, bool resend)
161{ 161{
162 int ret = 0;
163
162 irq_state_clr_disabled(desc); 164 irq_state_clr_disabled(desc);
163 desc->depth = 0; 165 desc->depth = 0;
164 166
165 if (desc->irq_data.chip->irq_startup) { 167 if (desc->irq_data.chip->irq_startup) {
166 int ret = desc->irq_data.chip->irq_startup(&desc->irq_data); 168 ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
167 irq_state_clr_masked(desc); 169 irq_state_clr_masked(desc);
168 return ret; 170 } else {
171 irq_enable(desc);
169 } 172 }
170 173 if (resend)
171 irq_enable(desc); 174 check_irq_resend(desc, desc->irq_data.irq);
172 return 0; 175 return ret;
173} 176}
174 177
175void irq_shutdown(struct irq_desc *desc) 178void irq_shutdown(struct irq_desc *desc)
@@ -330,6 +333,24 @@ out_unlock:
330} 333}
331EXPORT_SYMBOL_GPL(handle_simple_irq); 334EXPORT_SYMBOL_GPL(handle_simple_irq);
332 335
336/*
337 * Called unconditionally from handle_level_irq() and only for oneshot
338 * interrupts from handle_fasteoi_irq()
339 */
340static void cond_unmask_irq(struct irq_desc *desc)
341{
342 /*
343 * We need to unmask in the following cases:
344 * - Standard level irq (IRQF_ONESHOT is not set)
345 * - Oneshot irq which did not wake the thread (caused by a
346 * spurious interrupt or a primary handler handling it
347 * completely).
348 */
349 if (!irqd_irq_disabled(&desc->irq_data) &&
350 irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot)
351 unmask_irq(desc);
352}
353
333/** 354/**
334 * handle_level_irq - Level type irq handler 355 * handle_level_irq - Level type irq handler
335 * @irq: the interrupt number 356 * @irq: the interrupt number
@@ -362,8 +383,8 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
362 383
363 handle_irq_event(desc); 384 handle_irq_event(desc);
364 385
365 if (!irqd_irq_disabled(&desc->irq_data) && !(desc->istate & IRQS_ONESHOT)) 386 cond_unmask_irq(desc);
366 unmask_irq(desc); 387
367out_unlock: 388out_unlock:
368 raw_spin_unlock(&desc->lock); 389 raw_spin_unlock(&desc->lock);
369} 390}
@@ -417,6 +438,9 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
417 preflow_handler(desc); 438 preflow_handler(desc);
418 handle_irq_event(desc); 439 handle_irq_event(desc);
419 440
441 if (desc->istate & IRQS_ONESHOT)
442 cond_unmask_irq(desc);
443
420out_eoi: 444out_eoi:
421 desc->irq_data.chip->irq_eoi(&desc->irq_data); 445 desc->irq_data.chip->irq_eoi(&desc->irq_data);
422out_unlock: 446out_unlock:
@@ -625,7 +649,7 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
625 irq_settings_set_noprobe(desc); 649 irq_settings_set_noprobe(desc);
626 irq_settings_set_norequest(desc); 650 irq_settings_set_norequest(desc);
627 irq_settings_set_nothread(desc); 651 irq_settings_set_nothread(desc);
628 irq_startup(desc); 652 irq_startup(desc, true);
629 } 653 }
630out: 654out:
631 irq_put_desc_busunlock(desc, flags); 655 irq_put_desc_busunlock(desc, flags);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index b7952316016a..40378ff877e7 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -67,7 +67,7 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
67extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); 67extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
68extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); 68extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
69 69
70extern int irq_startup(struct irq_desc *desc); 70extern int irq_startup(struct irq_desc *desc, bool resend);
71extern void irq_shutdown(struct irq_desc *desc); 71extern void irq_shutdown(struct irq_desc *desc);
72extern void irq_enable(struct irq_desc *desc); 72extern void irq_enable(struct irq_desc *desc);
73extern void irq_disable(struct irq_desc *desc); 73extern void irq_disable(struct irq_desc *desc);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index a9a9dbe49fea..0f0d4704ddd8 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -985,6 +985,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
985 985
986 /* add new interrupt at end of irq queue */ 986 /* add new interrupt at end of irq queue */
987 do { 987 do {
988 /*
989 * Or all existing action->thread_mask bits,
990 * so we can find the next zero bit for this
991 * new action.
992 */
988 thread_mask |= old->thread_mask; 993 thread_mask |= old->thread_mask;
989 old_ptr = &old->next; 994 old_ptr = &old->next;
990 old = *old_ptr; 995 old = *old_ptr;
@@ -993,14 +998,41 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
993 } 998 }
994 999
995 /* 1000 /*
996 * Setup the thread mask for this irqaction. Unlikely to have 1001 * Setup the thread mask for this irqaction for ONESHOT. For
997 * 32 resp 64 irqs sharing one line, but who knows. 1002 * !ONESHOT irqs the thread mask is 0 so we can avoid a
1003 * conditional in irq_wake_thread().
998 */ 1004 */
999 if (new->flags & IRQF_ONESHOT && thread_mask == ~0UL) { 1005 if (new->flags & IRQF_ONESHOT) {
1000 ret = -EBUSY; 1006 /*
1001 goto out_mask; 1007 * Unlikely to have 32 resp 64 irqs sharing one line,
1008 * but who knows.
1009 */
1010 if (thread_mask == ~0UL) {
1011 ret = -EBUSY;
1012 goto out_mask;
1013 }
1014 /*
1015 * The thread_mask for the action is or'ed to
1016 * desc->thread_active to indicate that the
1017 * IRQF_ONESHOT thread handler has been woken, but not
1018 * yet finished. The bit is cleared when a thread
1019 * completes. When all threads of a shared interrupt
1020 * line have completed desc->threads_active becomes
1021 * zero and the interrupt line is unmasked. See
1022 * handle.c:irq_wake_thread() for further information.
1023 *
1024 * If no thread is woken by primary (hard irq context)
1025 * interrupt handlers, then desc->threads_active is
1026 * also checked for zero to unmask the irq line in the
1027 * affected hard irq flow handlers
1028 * (handle_[fasteoi|level]_irq).
1029 *
1030 * The new action gets the first zero bit of
1031 * thread_mask assigned. See the loop above which or's
1032 * all existing action->thread_mask bits.
1033 */
1034 new->thread_mask = 1 << ffz(thread_mask);
1002 } 1035 }
1003 new->thread_mask = 1 << ffz(thread_mask);
1004 1036
1005 if (!shared) { 1037 if (!shared) {
1006 init_waitqueue_head(&desc->wait_for_threads); 1038 init_waitqueue_head(&desc->wait_for_threads);
@@ -1027,7 +1059,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
1027 desc->istate |= IRQS_ONESHOT; 1059 desc->istate |= IRQS_ONESHOT;
1028 1060
1029 if (irq_settings_can_autoenable(desc)) 1061 if (irq_settings_can_autoenable(desc))
1030 irq_startup(desc); 1062 irq_startup(desc, true);
1031 else 1063 else
1032 /* Undo nested disables: */ 1064 /* Undo nested disables: */
1033 desc->depth = 1; 1065 desc->depth = 1;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 95dd7212e610..c62b8546cc90 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1077,6 +1077,7 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
1077 /* Early boot. kretprobe_table_locks not yet initialized. */ 1077 /* Early boot. kretprobe_table_locks not yet initialized. */
1078 return; 1078 return;
1079 1079
1080 INIT_HLIST_HEAD(&empty_rp);
1080 hash = hash_ptr(tk, KPROBE_HASH_BITS); 1081 hash = hash_ptr(tk, KPROBE_HASH_BITS);
1081 head = &kretprobe_inst_table[hash]; 1082 head = &kretprobe_inst_table[hash];
1082 kretprobe_table_lock(hash, &flags); 1083 kretprobe_table_lock(hash, &flags);
@@ -1085,7 +1086,6 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
1085 recycle_rp_inst(ri, &empty_rp); 1086 recycle_rp_inst(ri, &empty_rp);
1086 } 1087 }
1087 kretprobe_table_unlock(hash, &flags); 1088 kretprobe_table_unlock(hash, &flags);
1088 INIT_HLIST_HEAD(&empty_rp);
1089 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 1089 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
1090 hlist_del(&ri->hlist); 1090 hlist_del(&ri->hlist);
1091 kfree(ri); 1091 kfree(ri);
@@ -1334,8 +1334,10 @@ int __kprobes register_kprobe(struct kprobe *p)
1334 if (!kernel_text_address((unsigned long) p->addr) || 1334 if (!kernel_text_address((unsigned long) p->addr) ||
1335 in_kprobes_functions((unsigned long) p->addr) || 1335 in_kprobes_functions((unsigned long) p->addr) ||
1336 ftrace_text_reserved(p->addr, p->addr) || 1336 ftrace_text_reserved(p->addr, p->addr) ||
1337 jump_label_text_reserved(p->addr, p->addr)) 1337 jump_label_text_reserved(p->addr, p->addr)) {
1338 goto fail_with_jump_label; 1338 ret = -EINVAL;
1339 goto cannot_probe;
1340 }
1339 1341
1340 /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ 1342 /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
1341 p->flags &= KPROBE_FLAG_DISABLED; 1343 p->flags &= KPROBE_FLAG_DISABLED;
@@ -1352,7 +1354,7 @@ int __kprobes register_kprobe(struct kprobe *p)
1352 * its code to prohibit unexpected unloading. 1354 * its code to prohibit unexpected unloading.
1353 */ 1355 */
1354 if (unlikely(!try_module_get(probed_mod))) 1356 if (unlikely(!try_module_get(probed_mod)))
1355 goto fail_with_jump_label; 1357 goto cannot_probe;
1356 1358
1357 /* 1359 /*
1358 * If the module freed .init.text, we couldn't insert 1360 * If the module freed .init.text, we couldn't insert
@@ -1361,7 +1363,7 @@ int __kprobes register_kprobe(struct kprobe *p)
1361 if (within_module_init((unsigned long)p->addr, probed_mod) && 1363 if (within_module_init((unsigned long)p->addr, probed_mod) &&
1362 probed_mod->state != MODULE_STATE_COMING) { 1364 probed_mod->state != MODULE_STATE_COMING) {
1363 module_put(probed_mod); 1365 module_put(probed_mod);
1364 goto fail_with_jump_label; 1366 goto cannot_probe;
1365 } 1367 }
1366 /* ret will be updated by following code */ 1368 /* ret will be updated by following code */
1367 } 1369 }
@@ -1409,7 +1411,7 @@ out:
1409 1411
1410 return ret; 1412 return ret;
1411 1413
1412fail_with_jump_label: 1414cannot_probe:
1413 preempt_enable(); 1415 preempt_enable();
1414 jump_label_unlock(); 1416 jump_label_unlock();
1415 return ret; 1417 return ret;
@@ -1673,8 +1675,12 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
1673 ri->rp = rp; 1675 ri->rp = rp;
1674 ri->task = current; 1676 ri->task = current;
1675 1677
1676 if (rp->entry_handler && rp->entry_handler(ri, regs)) 1678 if (rp->entry_handler && rp->entry_handler(ri, regs)) {
1679 raw_spin_lock_irqsave(&rp->lock, flags);
1680 hlist_add_head(&ri->hlist, &rp->free_instances);
1681 raw_spin_unlock_irqrestore(&rp->lock, flags);
1677 return 0; 1682 return 0;
1683 }
1678 1684
1679 arch_prepare_kretprobe(ri, regs); 1685 arch_prepare_kretprobe(ri, regs);
1680 1686
diff --git a/kernel/params.c b/kernel/params.c
index 32ee04308285..4bc965d8a1fe 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -97,7 +97,8 @@ static int parse_one(char *param,
97 for (i = 0; i < num_params; i++) { 97 for (i = 0; i < num_params; i++) {
98 if (parameq(param, params[i].name)) { 98 if (parameq(param, params[i].name)) {
99 /* No one handled NULL, so do it here. */ 99 /* No one handled NULL, so do it here. */
100 if (!val && params[i].ops->set != param_set_bool) 100 if (!val && params[i].ops->set != param_set_bool
101 && params[i].ops->set != param_set_bint)
101 return -EINVAL; 102 return -EINVAL;
102 pr_debug("They are equal! Calling %p\n", 103 pr_debug("They are equal! Calling %p\n",
103 params[i].ops->set); 104 params[i].ops->set);
diff --git a/kernel/pid.c b/kernel/pid.c
index ce8e00deaccb..9f08dfabaf13 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -543,12 +543,12 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
543 */ 543 */
544void __init pidhash_init(void) 544void __init pidhash_init(void)
545{ 545{
546 int i, pidhash_size; 546 unsigned int i, pidhash_size;
547 547
548 pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18, 548 pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
549 HASH_EARLY | HASH_SMALL, 549 HASH_EARLY | HASH_SMALL,
550 &pidhash_shift, NULL, 4096); 550 &pidhash_shift, NULL, 4096);
551 pidhash_size = 1 << pidhash_shift; 551 pidhash_size = 1U << pidhash_shift;
552 552
553 for (i = 0; i < pidhash_size; i++) 553 for (i = 0; i < pidhash_size; i++)
554 INIT_HLIST_HEAD(&pid_hash[i]); 554 INIT_HLIST_HEAD(&pid_hash[i]);
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 0c4defe6d3b8..21724eee5206 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -231,8 +231,28 @@ extern int pm_test_level;
231#ifdef CONFIG_SUSPEND_FREEZER 231#ifdef CONFIG_SUSPEND_FREEZER
232static inline int suspend_freeze_processes(void) 232static inline int suspend_freeze_processes(void)
233{ 233{
234 int error = freeze_processes(); 234 int error;
235 return error ? : freeze_kernel_threads(); 235
236 error = freeze_processes();
237
238 /*
239 * freeze_processes() automatically thaws every task if freezing
240 * fails. So we need not do anything extra upon error.
241 */
242 if (error)
243 goto Finish;
244
245 error = freeze_kernel_threads();
246
247 /*
248 * freeze_kernel_threads() thaws only kernel threads upon freezing
249 * failure. So we have to thaw the userspace tasks ourselves.
250 */
251 if (error)
252 thaw_processes();
253
254 Finish:
255 return error;
236} 256}
237 257
238static inline void suspend_thaw_processes(void) 258static inline void suspend_thaw_processes(void)
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 77274c9ba2f1..7e426459e60a 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -143,7 +143,10 @@ int freeze_processes(void)
143/** 143/**
144 * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator. 144 * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator.
145 * 145 *
146 * On success, returns 0. On failure, -errno and system is fully thawed. 146 * On success, returns 0. On failure, -errno and only the kernel threads are
147 * thawed, so as to give a chance to the caller to do additional cleanups
148 * (if any) before thawing the userspace tasks. So, it is the responsibility
149 * of the caller to thaw the userspace tasks, when the time is right.
147 */ 150 */
148int freeze_kernel_threads(void) 151int freeze_kernel_threads(void)
149{ 152{
@@ -159,7 +162,7 @@ int freeze_kernel_threads(void)
159 BUG_ON(in_atomic()); 162 BUG_ON(in_atomic());
160 163
161 if (error) 164 if (error)
162 thaw_processes(); 165 thaw_kernel_threads();
163 return error; 166 return error;
164} 167}
165 168
@@ -188,3 +191,22 @@ void thaw_processes(void)
188 printk("done.\n"); 191 printk("done.\n");
189} 192}
190 193
194void thaw_kernel_threads(void)
195{
196 struct task_struct *g, *p;
197
198 pm_nosig_freezing = false;
199 printk("Restarting kernel threads ... ");
200
201 thaw_workqueues();
202
203 read_lock(&tasklist_lock);
204 do_each_thread(g, p) {
205 if (p->flags & (PF_KTHREAD | PF_WQ_WORKER))
206 __thaw_task(p);
207 } while_each_thread(g, p);
208 read_unlock(&tasklist_lock);
209
210 schedule();
211 printk("done.\n");
212}
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 1cf88900ec4f..6a768e537001 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -812,7 +812,8 @@ unsigned int snapshot_additional_pages(struct zone *zone)
812 unsigned int res; 812 unsigned int res;
813 813
814 res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); 814 res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
815 res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE); 815 res += DIV_ROUND_UP(res * sizeof(struct bm_block),
816 LINKED_PAGE_DATA_SIZE);
816 return 2 * res; 817 return 2 * res;
817} 818}
818 819
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 6b1ab7a88522..3e100075b13c 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -249,13 +249,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
249 } 249 }
250 pm_restore_gfp_mask(); 250 pm_restore_gfp_mask();
251 error = hibernation_snapshot(data->platform_support); 251 error = hibernation_snapshot(data->platform_support);
252 if (!error) { 252 if (error) {
253 thaw_kernel_threads();
254 } else {
253 error = put_user(in_suspend, (int __user *)arg); 255 error = put_user(in_suspend, (int __user *)arg);
254 if (!error && !freezer_test_done) 256 if (!error && !freezer_test_done)
255 data->ready = 1; 257 data->ready = 1;
256 if (freezer_test_done) { 258 if (freezer_test_done) {
257 freezer_test_done = false; 259 freezer_test_done = false;
258 thaw_processes(); 260 thaw_kernel_threads();
259 } 261 }
260 } 262 }
261 break; 263 break;
@@ -274,6 +276,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
274 swsusp_free(); 276 swsusp_free();
275 memset(&data->handle, 0, sizeof(struct snapshot_handle)); 277 memset(&data->handle, 0, sizeof(struct snapshot_handle));
276 data->ready = 0; 278 data->ready = 0;
279 /*
280 * It is necessary to thaw kernel threads here, because
281 * SNAPSHOT_CREATE_IMAGE may be invoked directly after
282 * SNAPSHOT_FREE. In that case, if kernel threads were not
283 * thawed, the preallocation of memory carried out by
284 * hibernation_snapshot() might run into problems (i.e. it
285 * might fail or even deadlock).
286 */
287 thaw_kernel_threads();
277 break; 288 break;
278 289
279 case SNAPSHOT_PREF_IMAGE_SIZE: 290 case SNAPSHOT_PREF_IMAGE_SIZE:
diff --git a/kernel/printk.c b/kernel/printk.c
index 13c0a1143f49..32690a0b7a18 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -702,6 +702,9 @@ static bool printk_time = 0;
702#endif 702#endif
703module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); 703module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
704 704
705static bool always_kmsg_dump;
706module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
707
705/* Check if we have any console registered that can be called early in boot. */ 708/* Check if we have any console registered that can be called early in boot. */
706static int have_callable_console(void) 709static int have_callable_console(void)
707{ 710{
@@ -1732,6 +1735,9 @@ void kmsg_dump(enum kmsg_dump_reason reason)
1732 unsigned long l1, l2; 1735 unsigned long l1, l2;
1733 unsigned long flags; 1736 unsigned long flags;
1734 1737
1738 if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
1739 return;
1740
1735 /* Theoretically, the log could move on after we do this, but 1741 /* Theoretically, the log could move on after we do this, but
1736 there's not a lot we can do about that. The new messages 1742 there's not a lot we can do about that. The new messages
1737 will overwrite the start of what we dump. */ 1743 will overwrite the start of what we dump. */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 88f17b8a3b1d..a58ac285fc69 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -56,8 +56,8 @@ static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */
56static int nfakewriters = 4; /* # fake writer threads */ 56static int nfakewriters = 4; /* # fake writer threads */
57static int stat_interval; /* Interval between stats, in seconds. */ 57static int stat_interval; /* Interval between stats, in seconds. */
58 /* Defaults to "only at end of test". */ 58 /* Defaults to "only at end of test". */
59static int verbose; /* Print more debug info. */ 59static bool verbose; /* Print more debug info. */
60static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ 60static bool test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
61static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/ 61static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
62static int stutter = 5; /* Start/stop testing interval (in sec) */ 62static int stutter = 5; /* Start/stop testing interval (in sec) */
63static int irqreader = 1; /* RCU readers from irq (timers). */ 63static int irqreader = 1; /* RCU readers from irq (timers). */
@@ -1399,7 +1399,7 @@ rcu_torture_shutdown(void *arg)
1399 * Execute random CPU-hotplug operations at the interval specified 1399 * Execute random CPU-hotplug operations at the interval specified
1400 * by the onoff_interval. 1400 * by the onoff_interval.
1401 */ 1401 */
1402static int 1402static int __cpuinit
1403rcu_torture_onoff(void *arg) 1403rcu_torture_onoff(void *arg)
1404{ 1404{
1405 int cpu; 1405 int cpu;
@@ -1447,7 +1447,7 @@ rcu_torture_onoff(void *arg)
1447 return 0; 1447 return 0;
1448} 1448}
1449 1449
1450static int 1450static int __cpuinit
1451rcu_torture_onoff_init(void) 1451rcu_torture_onoff_init(void)
1452{ 1452{
1453 if (onoff_interval <= 0) 1453 if (onoff_interval <= 0)
diff --git a/kernel/relay.c b/kernel/relay.c
index 4335e1d7ee2d..ab56a1764d4d 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -164,10 +164,14 @@ depopulate:
164 */ 164 */
165static struct rchan_buf *relay_create_buf(struct rchan *chan) 165static struct rchan_buf *relay_create_buf(struct rchan *chan)
166{ 166{
167 struct rchan_buf *buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); 167 struct rchan_buf *buf;
168 if (!buf) 168
169 if (chan->n_subbufs > UINT_MAX / sizeof(size_t *))
169 return NULL; 170 return NULL;
170 171
172 buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL);
173 if (!buf)
174 return NULL;
171 buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL); 175 buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL);
172 if (!buf->padding) 176 if (!buf->padding)
173 goto free_buf; 177 goto free_buf;
@@ -574,6 +578,8 @@ struct rchan *relay_open(const char *base_filename,
574 578
575 if (!(subbuf_size && n_subbufs)) 579 if (!(subbuf_size && n_subbufs))
576 return NULL; 580 return NULL;
581 if (subbuf_size > UINT_MAX / n_subbufs)
582 return NULL;
577 583
578 chan = kzalloc(sizeof(struct rchan), GFP_KERNEL); 584 chan = kzalloc(sizeof(struct rchan), GFP_KERNEL);
579 if (!chan) 585 if (!chan)
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 6d269cce7aa1..d508363858b3 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -66,6 +66,31 @@ done:
66 return ret; 66 return ret;
67} 67}
68 68
69int res_counter_charge_nofail(struct res_counter *counter, unsigned long val,
70 struct res_counter **limit_fail_at)
71{
72 int ret, r;
73 unsigned long flags;
74 struct res_counter *c;
75
76 r = ret = 0;
77 *limit_fail_at = NULL;
78 local_irq_save(flags);
79 for (c = counter; c != NULL; c = c->parent) {
80 spin_lock(&c->lock);
81 r = res_counter_charge_locked(c, val);
82 if (r)
83 c->usage += val;
84 spin_unlock(&c->lock);
85 if (r < 0 && ret == 0) {
86 *limit_fail_at = c;
87 ret = r;
88 }
89 }
90 local_irq_restore(flags);
91
92 return ret;
93}
69void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) 94void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
70{ 95{
71 if (WARN_ON(counter->usage < val)) 96 if (WARN_ON(counter->usage < val))
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index df00cb09263e..b342f57879e6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -74,6 +74,7 @@
74 74
75#include <asm/tlb.h> 75#include <asm/tlb.h>
76#include <asm/irq_regs.h> 76#include <asm/irq_regs.h>
77#include <asm/mutex.h>
77#ifdef CONFIG_PARAVIRT 78#ifdef CONFIG_PARAVIRT
78#include <asm/paravirt.h> 79#include <asm/paravirt.h>
79#endif 80#endif
@@ -723,9 +724,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
723 p->sched_class->dequeue_task(rq, p, flags); 724 p->sched_class->dequeue_task(rq, p, flags);
724} 725}
725 726
726/*
727 * activate_task - move a task to the runqueue.
728 */
729void activate_task(struct rq *rq, struct task_struct *p, int flags) 727void activate_task(struct rq *rq, struct task_struct *p, int flags)
730{ 728{
731 if (task_contributes_to_load(p)) 729 if (task_contributes_to_load(p))
@@ -734,9 +732,6 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
734 enqueue_task(rq, p, flags); 732 enqueue_task(rq, p, flags);
735} 733}
736 734
737/*
738 * deactivate_task - remove a task from the runqueue.
739 */
740void deactivate_task(struct rq *rq, struct task_struct *p, int flags) 735void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
741{ 736{
742 if (task_contributes_to_load(p)) 737 if (task_contributes_to_load(p))
@@ -1937,7 +1932,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
1937 local_irq_enable(); 1932 local_irq_enable();
1938#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ 1933#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
1939 finish_lock_switch(rq, prev); 1934 finish_lock_switch(rq, prev);
1940 trace_sched_stat_sleeptime(current, rq->clock);
1941 1935
1942 fire_sched_in_preempt_notifiers(current); 1936 fire_sched_in_preempt_notifiers(current);
1943 if (mm) 1937 if (mm)
@@ -4134,7 +4128,7 @@ recheck:
4134 on_rq = p->on_rq; 4128 on_rq = p->on_rq;
4135 running = task_current(rq, p); 4129 running = task_current(rq, p);
4136 if (on_rq) 4130 if (on_rq)
4137 deactivate_task(rq, p, 0); 4131 dequeue_task(rq, p, 0);
4138 if (running) 4132 if (running)
4139 p->sched_class->put_prev_task(rq, p); 4133 p->sched_class->put_prev_task(rq, p);
4140 4134
@@ -4147,7 +4141,7 @@ recheck:
4147 if (running) 4141 if (running)
4148 p->sched_class->set_curr_task(rq); 4142 p->sched_class->set_curr_task(rq);
4149 if (on_rq) 4143 if (on_rq)
4150 activate_task(rq, p, 0); 4144 enqueue_task(rq, p, 0);
4151 4145
4152 check_class_changed(rq, p, prev_class, oldprio); 4146 check_class_changed(rq, p, prev_class, oldprio);
4153 task_rq_unlock(rq, p, &flags); 4147 task_rq_unlock(rq, p, &flags);
@@ -4998,9 +4992,9 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
4998 * placed properly. 4992 * placed properly.
4999 */ 4993 */
5000 if (p->on_rq) { 4994 if (p->on_rq) {
5001 deactivate_task(rq_src, p, 0); 4995 dequeue_task(rq_src, p, 0);
5002 set_task_cpu(p, dest_cpu); 4996 set_task_cpu(p, dest_cpu);
5003 activate_task(rq_dest, p, 0); 4997 enqueue_task(rq_dest, p, 0);
5004 check_preempt_curr(rq_dest, p, 0); 4998 check_preempt_curr(rq_dest, p, 0);
5005 } 4999 }
5006done: 5000done:
@@ -7032,10 +7026,10 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
7032 7026
7033 on_rq = p->on_rq; 7027 on_rq = p->on_rq;
7034 if (on_rq) 7028 if (on_rq)
7035 deactivate_task(rq, p, 0); 7029 dequeue_task(rq, p, 0);
7036 __setscheduler(rq, p, SCHED_NORMAL, 0); 7030 __setscheduler(rq, p, SCHED_NORMAL, 0);
7037 if (on_rq) { 7031 if (on_rq) {
7038 activate_task(rq, p, 0); 7032 enqueue_task(rq, p, 0);
7039 resched_task(rq->curr); 7033 resched_task(rq->curr);
7040 } 7034 }
7041 7035
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index b0d798eaf130..d72586fdf660 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -129,7 +129,7 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
129 * cpupri_set - update the cpu priority setting 129 * cpupri_set - update the cpu priority setting
130 * @cp: The cpupri context 130 * @cp: The cpupri context
131 * @cpu: The target cpu 131 * @cpu: The target cpu
132 * @pri: The priority (INVALID-RT99) to assign to this CPU 132 * @newpri: The priority (INVALID-RT99) to assign to this CPU
133 * 133 *
134 * Note: Assumes cpu_rq(cpu)->lock is locked 134 * Note: Assumes cpu_rq(cpu)->lock is locked
135 * 135 *
@@ -200,7 +200,6 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
200/** 200/**
201 * cpupri_init - initialize the cpupri structure 201 * cpupri_init - initialize the cpupri structure
202 * @cp: The cpupri context 202 * @cp: The cpupri context
203 * @bootmem: true if allocations need to use bootmem
204 * 203 *
205 * Returns: -ENOMEM if memory fails. 204 * Returns: -ENOMEM if memory fails.
206 */ 205 */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 84adb2d66cbd..aca16b843b7e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1003,6 +1003,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
1003 if (unlikely(delta > se->statistics.sleep_max)) 1003 if (unlikely(delta > se->statistics.sleep_max))
1004 se->statistics.sleep_max = delta; 1004 se->statistics.sleep_max = delta;
1005 1005
1006 se->statistics.sleep_start = 0;
1006 se->statistics.sum_sleep_runtime += delta; 1007 se->statistics.sum_sleep_runtime += delta;
1007 1008
1008 if (tsk) { 1009 if (tsk) {
@@ -1019,6 +1020,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
1019 if (unlikely(delta > se->statistics.block_max)) 1020 if (unlikely(delta > se->statistics.block_max))
1020 se->statistics.block_max = delta; 1021 se->statistics.block_max = delta;
1021 1022
1023 se->statistics.block_start = 0;
1022 se->statistics.sum_sleep_runtime += delta; 1024 se->statistics.sum_sleep_runtime += delta;
1023 1025
1024 if (tsk) { 1026 if (tsk) {
@@ -4866,6 +4868,15 @@ static void nohz_balancer_kick(int cpu)
4866 return; 4868 return;
4867} 4869}
4868 4870
4871static inline void clear_nohz_tick_stopped(int cpu)
4872{
4873 if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
4874 cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
4875 atomic_dec(&nohz.nr_cpus);
4876 clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
4877 }
4878}
4879
4869static inline void set_cpu_sd_state_busy(void) 4880static inline void set_cpu_sd_state_busy(void)
4870{ 4881{
4871 struct sched_domain *sd; 4882 struct sched_domain *sd;
@@ -4904,6 +4915,12 @@ void select_nohz_load_balancer(int stop_tick)
4904{ 4915{
4905 int cpu = smp_processor_id(); 4916 int cpu = smp_processor_id();
4906 4917
4918 /*
4919 * If this cpu is going down, then nothing needs to be done.
4920 */
4921 if (!cpu_active(cpu))
4922 return;
4923
4907 if (stop_tick) { 4924 if (stop_tick) {
4908 if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) 4925 if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))
4909 return; 4926 return;
@@ -4914,6 +4931,18 @@ void select_nohz_load_balancer(int stop_tick)
4914 } 4931 }
4915 return; 4932 return;
4916} 4933}
4934
4935static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb,
4936 unsigned long action, void *hcpu)
4937{
4938 switch (action & ~CPU_TASKS_FROZEN) {
4939 case CPU_DYING:
4940 clear_nohz_tick_stopped(smp_processor_id());
4941 return NOTIFY_OK;
4942 default:
4943 return NOTIFY_DONE;
4944 }
4945}
4917#endif 4946#endif
4918 4947
4919static DEFINE_SPINLOCK(balancing); 4948static DEFINE_SPINLOCK(balancing);
@@ -5070,11 +5099,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
5070 * busy tick after returning from idle, we will update the busy stats. 5099 * busy tick after returning from idle, we will update the busy stats.
5071 */ 5100 */
5072 set_cpu_sd_state_busy(); 5101 set_cpu_sd_state_busy();
5073 if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { 5102 clear_nohz_tick_stopped(cpu);
5074 clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
5075 cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
5076 atomic_dec(&nohz.nr_cpus);
5077 }
5078 5103
5079 /* 5104 /*
5080 * None are in tickless mode and hence no need for NOHZ idle load 5105 * None are in tickless mode and hence no need for NOHZ idle load
@@ -5590,6 +5615,7 @@ __init void init_sched_fair_class(void)
5590 5615
5591#ifdef CONFIG_NO_HZ 5616#ifdef CONFIG_NO_HZ
5592 zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); 5617 zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
5618 cpu_notifier(sched_ilb_notifier, 0);
5593#endif 5619#endif
5594#endif /* SMP */ 5620#endif /* SMP */
5595 5621
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 3640ebbb466b..f42ae7fb5ec5 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1587,6 +1587,11 @@ static int push_rt_task(struct rq *rq)
1587 if (!next_task) 1587 if (!next_task)
1588 return 0; 1588 return 0;
1589 1589
1590#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
1591 if (unlikely(task_running(rq, next_task)))
1592 return 0;
1593#endif
1594
1590retry: 1595retry:
1591 if (unlikely(next_task == rq->curr)) { 1596 if (unlikely(next_task == rq->curr)) {
1592 WARN_ON(1); 1597 WARN_ON(1);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 1d7bca7f4f52..d117262deba3 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -296,7 +296,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
296 if (__this_cpu_read(soft_watchdog_warn) == true) 296 if (__this_cpu_read(soft_watchdog_warn) == true)
297 return HRTIMER_RESTART; 297 return HRTIMER_RESTART;
298 298
299 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", 299 printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
300 smp_processor_id(), duration, 300 smp_processor_id(), duration,
301 current->comm, task_pid_nr(current)); 301 current->comm, task_pid_nr(current));
302 print_modules(); 302 print_modules();