aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/debug/kdb/kdb_main.c21
-rw-r--r--kernel/exit.c9
-rw-r--r--kernel/futex.c3
-rw-r--r--kernel/futex_compat.c3
-rw-r--r--kernel/hw_breakpoint.c3
-rw-r--r--kernel/irq_work.c4
-rw-r--r--kernel/module.c12
-rw-r--r--kernel/perf_event.c93
-rw-r--r--kernel/pm_qos_params.c4
-rw-r--r--kernel/posix-cpu-timers.c12
-rw-r--r--kernel/power/Kconfig4
-rw-r--r--kernel/power/hibernate.c22
-rw-r--r--kernel/power/suspend.c5
-rw-r--r--kernel/power/swap.c53
-rw-r--r--kernel/power/user.c2
-rw-r--r--kernel/sched.c39
-rw-r--r--kernel/sched_fair.c48
-rw-r--r--kernel/sched_stoptask.c4
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/trace.c20
21 files changed, 268 insertions, 97 deletions
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 37755d621924..a6e729766821 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -82,7 +82,7 @@ static kdbtab_t kdb_base_commands[50];
82#define for_each_kdbcmd(cmd, num) \ 82#define for_each_kdbcmd(cmd, num) \
83 for ((cmd) = kdb_base_commands, (num) = 0; \ 83 for ((cmd) = kdb_base_commands, (num) = 0; \
84 num < kdb_max_commands; \ 84 num < kdb_max_commands; \
85 num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++, num++) 85 num++, num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++)
86 86
87typedef struct _kdbmsg { 87typedef struct _kdbmsg {
88 int km_diag; /* kdb diagnostic */ 88 int km_diag; /* kdb diagnostic */
@@ -646,7 +646,7 @@ static int kdb_defcmd2(const char *cmdstr, const char *argv0)
646 } 646 }
647 if (!s->usable) 647 if (!s->usable)
648 return KDB_NOTIMP; 648 return KDB_NOTIMP;
649 s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB); 649 s->command = kzalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);
650 if (!s->command) { 650 if (!s->command) {
651 kdb_printf("Could not allocate new kdb_defcmd table for %s\n", 651 kdb_printf("Could not allocate new kdb_defcmd table for %s\n",
652 cmdstr); 652 cmdstr);
@@ -2361,7 +2361,7 @@ static int kdb_pid(int argc, const char **argv)
2361 */ 2361 */
2362static int kdb_ll(int argc, const char **argv) 2362static int kdb_ll(int argc, const char **argv)
2363{ 2363{
2364 int diag; 2364 int diag = 0;
2365 unsigned long addr; 2365 unsigned long addr;
2366 long offset = 0; 2366 long offset = 0;
2367 unsigned long va; 2367 unsigned long va;
@@ -2400,20 +2400,21 @@ static int kdb_ll(int argc, const char **argv)
2400 char buf[80]; 2400 char buf[80];
2401 2401
2402 if (KDB_FLAG(CMD_INTERRUPT)) 2402 if (KDB_FLAG(CMD_INTERRUPT))
2403 return 0; 2403 goto out;
2404 2404
2405 sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va); 2405 sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va);
2406 diag = kdb_parse(buf); 2406 diag = kdb_parse(buf);
2407 if (diag) 2407 if (diag)
2408 return diag; 2408 goto out;
2409 2409
2410 addr = va + linkoffset; 2410 addr = va + linkoffset;
2411 if (kdb_getword(&va, addr, sizeof(va))) 2411 if (kdb_getword(&va, addr, sizeof(va)))
2412 return 0; 2412 goto out;
2413 } 2413 }
2414 kfree(command);
2415 2414
2416 return 0; 2415out:
2416 kfree(command);
2417 return diag;
2417} 2418}
2418 2419
2419static int kdb_kgdb(int argc, const char **argv) 2420static int kdb_kgdb(int argc, const char **argv)
@@ -2739,13 +2740,13 @@ int kdb_register_repeat(char *cmd,
2739 } 2740 }
2740 if (kdb_commands) { 2741 if (kdb_commands) {
2741 memcpy(new, kdb_commands, 2742 memcpy(new, kdb_commands,
2742 kdb_max_commands * sizeof(*new)); 2743 (kdb_max_commands - KDB_BASE_CMD_MAX) * sizeof(*new));
2743 kfree(kdb_commands); 2744 kfree(kdb_commands);
2744 } 2745 }
2745 memset(new + kdb_max_commands, 0, 2746 memset(new + kdb_max_commands, 0,
2746 kdb_command_extend * sizeof(*new)); 2747 kdb_command_extend * sizeof(*new));
2747 kdb_commands = new; 2748 kdb_commands = new;
2748 kp = kdb_commands + kdb_max_commands; 2749 kp = kdb_commands + kdb_max_commands - KDB_BASE_CMD_MAX;
2749 kdb_max_commands += kdb_command_extend; 2750 kdb_max_commands += kdb_command_extend;
2750 } 2751 }
2751 2752
diff --git a/kernel/exit.c b/kernel/exit.c
index 21aa7b3001fb..676149a4ac5f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -914,6 +914,15 @@ NORET_TYPE void do_exit(long code)
914 if (unlikely(!tsk->pid)) 914 if (unlikely(!tsk->pid))
915 panic("Attempted to kill the idle task!"); 915 panic("Attempted to kill the idle task!");
916 916
917 /*
918 * If do_exit is called because this processes oopsed, it's possible
919 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
920 * continuing. Amongst other possible reasons, this is to prevent
921 * mm_release()->clear_child_tid() from writing to a user-controlled
922 * kernel address.
923 */
924 set_fs(USER_DS);
925
917 tracehook_report_exit(&code); 926 tracehook_report_exit(&code);
918 927
919 validate_creds_for_do_exit(tsk); 928 validate_creds_for_do_exit(tsk);
diff --git a/kernel/futex.c b/kernel/futex.c
index 6c683b37f2ce..40a8777a27d0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2489,7 +2489,8 @@ void exit_robust_list(struct task_struct *curr)
2489{ 2489{
2490 struct robust_list_head __user *head = curr->robust_list; 2490 struct robust_list_head __user *head = curr->robust_list;
2491 struct robust_list __user *entry, *next_entry, *pending; 2491 struct robust_list __user *entry, *next_entry, *pending;
2492 unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; 2492 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2493 unsigned int uninitialized_var(next_pi);
2493 unsigned long futex_offset; 2494 unsigned long futex_offset;
2494 int rc; 2495 int rc;
2495 2496
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 06da4dfc339b..a7934ac75e5b 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -49,7 +49,8 @@ void compat_exit_robust_list(struct task_struct *curr)
49{ 49{
50 struct compat_robust_list_head __user *head = curr->compat_robust_list; 50 struct compat_robust_list_head __user *head = curr->compat_robust_list;
51 struct robust_list __user *entry, *next_entry, *pending; 51 struct robust_list __user *entry, *next_entry, *pending;
52 unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; 52 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
53 unsigned int uninitialized_var(next_pi);
53 compat_uptr_t uentry, next_uentry, upending; 54 compat_uptr_t uentry, next_uentry, upending;
54 compat_long_t futex_offset; 55 compat_long_t futex_offset;
55 int rc; 56 int rc;
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 2c9120f0afca..e5325825aeb6 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = {
620 .read = hw_breakpoint_pmu_read, 620 .read = hw_breakpoint_pmu_read,
621}; 621};
622 622
623static int __init init_hw_breakpoint(void) 623int __init init_hw_breakpoint(void)
624{ 624{
625 unsigned int **task_bp_pinned; 625 unsigned int **task_bp_pinned;
626 int cpu, err_cpu; 626 int cpu, err_cpu;
@@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void)
655 655
656 return -ENOMEM; 656 return -ENOMEM;
657} 657}
658core_initcall(init_hw_breakpoint);
659 658
660 659
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index f16763ff8481..90f881904bb1 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -145,7 +145,9 @@ void irq_work_run(void)
145 * Clear the BUSY bit and return to the free state if 145 * Clear the BUSY bit and return to the free state if
146 * no-one else claimed it meanwhile. 146 * no-one else claimed it meanwhile.
147 */ 147 */
148 cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL); 148 (void)cmpxchg(&entry->next,
149 next_flags(NULL, IRQ_WORK_BUSY),
150 NULL);
149 } 151 }
150} 152}
151EXPORT_SYMBOL_GPL(irq_work_run); 153EXPORT_SYMBOL_GPL(irq_work_run);
diff --git a/kernel/module.c b/kernel/module.c
index 437a74a7524a..d190664f25ff 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info)
2326 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * 2326 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
2327 mod->num_trace_events, GFP_KERNEL); 2327 mod->num_trace_events, GFP_KERNEL);
2328#endif 2328#endif
2329#ifdef CONFIG_TRACING
2330 mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
2331 sizeof(*mod->trace_bprintk_fmt_start),
2332 &mod->num_trace_bprintk_fmt);
2333 /*
2334 * This section contains pointers to allocated objects in the trace
2335 * code and not scanning it leads to false positives.
2336 */
2337 kmemleak_scan_area(mod->trace_bprintk_fmt_start,
2338 sizeof(*mod->trace_bprintk_fmt_start) *
2339 mod->num_trace_bprintk_fmt, GFP_KERNEL);
2340#endif
2329#ifdef CONFIG_FTRACE_MCOUNT_RECORD 2341#ifdef CONFIG_FTRACE_MCOUNT_RECORD
2330 /* sechdrs[0].sh_size is always zero */ 2342 /* sechdrs[0].sh_size is always zero */
2331 mod->ftrace_callsites = section_objs(info, "__mcount_loc", 2343 mod->ftrace_callsites = section_objs(info, "__mcount_loc",
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index cb6c0d2af68f..eac7e3364335 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,6 +31,7 @@
31#include <linux/kernel_stat.h> 31#include <linux/kernel_stat.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/ftrace_event.h> 33#include <linux/ftrace_event.h>
34#include <linux/hw_breakpoint.h>
34 35
35#include <asm/irq_regs.h> 36#include <asm/irq_regs.h>
36 37
@@ -1286,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
1286{ 1287{
1287 int ctxn; 1288 int ctxn;
1288 1289
1289 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1290
1291 for_each_task_context_nr(ctxn) 1290 for_each_task_context_nr(ctxn)
1292 perf_event_context_sched_out(task, ctxn, next); 1291 perf_event_context_sched_out(task, ctxn, next);
1293} 1292}
@@ -1621,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx)
1621{ 1620{
1622 raw_spin_lock(&ctx->lock); 1621 raw_spin_lock(&ctx->lock);
1623 1622
1624 /* Rotate the first entry last of non-pinned groups */ 1623 /*
1625 list_rotate_left(&ctx->flexible_groups); 1624 * Rotate the first entry last of non-pinned groups. Rotation might be
1625 * disabled by the inheritance code.
1626 */
1627 if (!ctx->rotate_disable)
1628 list_rotate_left(&ctx->flexible_groups);
1626 1629
1627 raw_spin_unlock(&ctx->lock); 1630 raw_spin_unlock(&ctx->lock);
1628} 1631}
@@ -2234,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event)
2234 raw_spin_unlock_irq(&ctx->lock); 2237 raw_spin_unlock_irq(&ctx->lock);
2235 mutex_unlock(&ctx->mutex); 2238 mutex_unlock(&ctx->mutex);
2236 2239
2237 mutex_lock(&event->owner->perf_event_mutex);
2238 list_del_init(&event->owner_entry);
2239 mutex_unlock(&event->owner->perf_event_mutex);
2240 put_task_struct(event->owner);
2241
2242 free_event(event); 2240 free_event(event);
2243 2241
2244 return 0; 2242 return 0;
@@ -2251,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
2251static int perf_release(struct inode *inode, struct file *file) 2249static int perf_release(struct inode *inode, struct file *file)
2252{ 2250{
2253 struct perf_event *event = file->private_data; 2251 struct perf_event *event = file->private_data;
2252 struct task_struct *owner;
2254 2253
2255 file->private_data = NULL; 2254 file->private_data = NULL;
2256 2255
2256 rcu_read_lock();
2257 owner = ACCESS_ONCE(event->owner);
2258 /*
2259 * Matches the smp_wmb() in perf_event_exit_task(). If we observe
2260 * !owner it means the list deletion is complete and we can indeed
2261 * free this event, otherwise we need to serialize on
2262 * owner->perf_event_mutex.
2263 */
2264 smp_read_barrier_depends();
2265 if (owner) {
2266 /*
2267 * Since delayed_put_task_struct() also drops the last
2268 * task reference we can safely take a new reference
2269 * while holding the rcu_read_lock().
2270 */
2271 get_task_struct(owner);
2272 }
2273 rcu_read_unlock();
2274
2275 if (owner) {
2276 mutex_lock(&owner->perf_event_mutex);
2277 /*
2278 * We have to re-check the event->owner field, if it is cleared
2279 * we raced with perf_event_exit_task(), acquiring the mutex
2280 * ensured they're done, and we can proceed with freeing the
2281 * event.
2282 */
2283 if (event->owner)
2284 list_del_init(&event->owner_entry);
2285 mutex_unlock(&owner->perf_event_mutex);
2286 put_task_struct(owner);
2287 }
2288
2257 return perf_event_release_kernel(event); 2289 return perf_event_release_kernel(event);
2258} 2290}
2259 2291
@@ -5677,7 +5709,7 @@ SYSCALL_DEFINE5(perf_event_open,
5677 mutex_unlock(&ctx->mutex); 5709 mutex_unlock(&ctx->mutex);
5678 5710
5679 event->owner = current; 5711 event->owner = current;
5680 get_task_struct(current); 5712
5681 mutex_lock(&current->perf_event_mutex); 5713 mutex_lock(&current->perf_event_mutex);
5682 list_add_tail(&event->owner_entry, &current->perf_event_list); 5714 list_add_tail(&event->owner_entry, &current->perf_event_list);
5683 mutex_unlock(&current->perf_event_mutex); 5715 mutex_unlock(&current->perf_event_mutex);
@@ -5745,12 +5777,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
5745 ++ctx->generation; 5777 ++ctx->generation;
5746 mutex_unlock(&ctx->mutex); 5778 mutex_unlock(&ctx->mutex);
5747 5779
5748 event->owner = current;
5749 get_task_struct(current);
5750 mutex_lock(&current->perf_event_mutex);
5751 list_add_tail(&event->owner_entry, &current->perf_event_list);
5752 mutex_unlock(&current->perf_event_mutex);
5753
5754 return event; 5780 return event;
5755 5781
5756err_free: 5782err_free:
@@ -5901,8 +5927,24 @@ again:
5901 */ 5927 */
5902void perf_event_exit_task(struct task_struct *child) 5928void perf_event_exit_task(struct task_struct *child)
5903{ 5929{
5930 struct perf_event *event, *tmp;
5904 int ctxn; 5931 int ctxn;
5905 5932
5933 mutex_lock(&child->perf_event_mutex);
5934 list_for_each_entry_safe(event, tmp, &child->perf_event_list,
5935 owner_entry) {
5936 list_del_init(&event->owner_entry);
5937
5938 /*
5939 * Ensure the list deletion is visible before we clear
5940 * the owner, closes a race against perf_release() where
5941 * we need to serialize on the owner->perf_event_mutex.
5942 */
5943 smp_wmb();
5944 event->owner = NULL;
5945 }
5946 mutex_unlock(&child->perf_event_mutex);
5947
5906 for_each_task_context_nr(ctxn) 5948 for_each_task_context_nr(ctxn)
5907 perf_event_exit_task_context(child, ctxn); 5949 perf_event_exit_task_context(child, ctxn);
5908} 5950}
@@ -6122,6 +6164,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6122 struct perf_event *event; 6164 struct perf_event *event;
6123 struct task_struct *parent = current; 6165 struct task_struct *parent = current;
6124 int inherited_all = 1; 6166 int inherited_all = 1;
6167 unsigned long flags;
6125 int ret = 0; 6168 int ret = 0;
6126 6169
6127 child->perf_event_ctxp[ctxn] = NULL; 6170 child->perf_event_ctxp[ctxn] = NULL;
@@ -6162,6 +6205,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6162 break; 6205 break;
6163 } 6206 }
6164 6207
6208 /*
6209 * We can't hold ctx->lock when iterating the ->flexible_group list due
6210 * to allocations, but we need to prevent rotation because
6211 * rotate_ctx() will change the list from interrupt context.
6212 */
6213 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
6214 parent_ctx->rotate_disable = 1;
6215 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6216
6165 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { 6217 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
6166 ret = inherit_task_group(event, parent, parent_ctx, 6218 ret = inherit_task_group(event, parent, parent_ctx,
6167 child, ctxn, &inherited_all); 6219 child, ctxn, &inherited_all);
@@ -6169,6 +6221,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6169 break; 6221 break;
6170 } 6222 }
6171 6223
6224 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
6225 parent_ctx->rotate_disable = 0;
6226 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6227
6172 child_ctx = child->perf_event_ctxp[ctxn]; 6228 child_ctx = child->perf_event_ctxp[ctxn];
6173 6229
6174 if (child_ctx && inherited_all) { 6230 if (child_ctx && inherited_all) {
@@ -6321,6 +6377,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
6321 6377
6322void __init perf_event_init(void) 6378void __init perf_event_init(void)
6323{ 6379{
6380 int ret;
6381
6324 perf_event_init_all_cpus(); 6382 perf_event_init_all_cpus();
6325 init_srcu_struct(&pmus_srcu); 6383 init_srcu_struct(&pmus_srcu);
6326 perf_pmu_register(&perf_swevent); 6384 perf_pmu_register(&perf_swevent);
@@ -6328,4 +6386,7 @@ void __init perf_event_init(void)
6328 perf_pmu_register(&perf_task_clock); 6386 perf_pmu_register(&perf_task_clock);
6329 perf_tp_register(); 6387 perf_tp_register();
6330 perf_cpu_notifier(perf_cpu_notify); 6388 perf_cpu_notifier(perf_cpu_notify);
6389
6390 ret = init_hw_breakpoint();
6391 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
6331} 6392}
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index c7a8f453919e..aeaa7f846821 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -121,10 +121,10 @@ static inline int pm_qos_get_value(struct pm_qos_object *o)
121 121
122 switch (o->type) { 122 switch (o->type) {
123 case PM_QOS_MIN: 123 case PM_QOS_MIN:
124 return plist_last(&o->requests)->prio; 124 return plist_first(&o->requests)->prio;
125 125
126 case PM_QOS_MAX: 126 case PM_QOS_MAX:
127 return plist_first(&o->requests)->prio; 127 return plist_last(&o->requests)->prio;
128 128
129 default: 129 default:
130 /* runtime check for not using enum */ 130 /* runtime check for not using enum */
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 6842eeba5879..05bb7173850e 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -37,13 +37,13 @@ static int check_clock(const clockid_t which_clock)
37 if (pid == 0) 37 if (pid == 0)
38 return 0; 38 return 0;
39 39
40 read_lock(&tasklist_lock); 40 rcu_read_lock();
41 p = find_task_by_vpid(pid); 41 p = find_task_by_vpid(pid);
42 if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? 42 if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
43 same_thread_group(p, current) : thread_group_leader(p))) { 43 same_thread_group(p, current) : has_group_leader_pid(p))) {
44 error = -EINVAL; 44 error = -EINVAL;
45 } 45 }
46 read_unlock(&tasklist_lock); 46 rcu_read_unlock();
47 47
48 return error; 48 return error;
49} 49}
@@ -390,7 +390,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
390 390
391 INIT_LIST_HEAD(&new_timer->it.cpu.entry); 391 INIT_LIST_HEAD(&new_timer->it.cpu.entry);
392 392
393 read_lock(&tasklist_lock); 393 rcu_read_lock();
394 if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { 394 if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
395 if (pid == 0) { 395 if (pid == 0) {
396 p = current; 396 p = current;
@@ -404,7 +404,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
404 p = current->group_leader; 404 p = current->group_leader;
405 } else { 405 } else {
406 p = find_task_by_vpid(pid); 406 p = find_task_by_vpid(pid);
407 if (p && !thread_group_leader(p)) 407 if (p && !has_group_leader_pid(p))
408 p = NULL; 408 p = NULL;
409 } 409 }
410 } 410 }
@@ -414,7 +414,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
414 } else { 414 } else {
415 ret = -EINVAL; 415 ret = -EINVAL;
416 } 416 }
417 read_unlock(&tasklist_lock); 417 rcu_read_unlock();
418 418
419 return ret; 419 return ret;
420} 420}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 29bff6117abc..a5aff3ebad38 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -246,9 +246,13 @@ config PM_OPS
246 depends on PM_SLEEP || PM_RUNTIME 246 depends on PM_SLEEP || PM_RUNTIME
247 default y 247 default y
248 248
249config ARCH_HAS_OPP
250 bool
251
249config PM_OPP 252config PM_OPP
250 bool "Operating Performance Point (OPP) Layer library" 253 bool "Operating Performance Point (OPP) Layer library"
251 depends on PM 254 depends on PM
255 depends on ARCH_HAS_OPP
252 ---help--- 256 ---help---
253 SOCs have a standard set of tuples consisting of frequency and 257 SOCs have a standard set of tuples consisting of frequency and
254 voltage pairs that the device will support per voltage domain. This 258 voltage pairs that the device will support per voltage domain. This
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 657272e91d0a..048d0b514831 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -327,7 +327,6 @@ static int create_image(int platform_mode)
327int hibernation_snapshot(int platform_mode) 327int hibernation_snapshot(int platform_mode)
328{ 328{
329 int error; 329 int error;
330 gfp_t saved_mask;
331 330
332 error = platform_begin(platform_mode); 331 error = platform_begin(platform_mode);
333 if (error) 332 if (error)
@@ -339,7 +338,7 @@ int hibernation_snapshot(int platform_mode)
339 goto Close; 338 goto Close;
340 339
341 suspend_console(); 340 suspend_console();
342 saved_mask = clear_gfp_allowed_mask(GFP_IOFS); 341 pm_restrict_gfp_mask();
343 error = dpm_suspend_start(PMSG_FREEZE); 342 error = dpm_suspend_start(PMSG_FREEZE);
344 if (error) 343 if (error)
345 goto Recover_platform; 344 goto Recover_platform;
@@ -348,7 +347,10 @@ int hibernation_snapshot(int platform_mode)
348 goto Recover_platform; 347 goto Recover_platform;
349 348
350 error = create_image(platform_mode); 349 error = create_image(platform_mode);
351 /* Control returns here after successful restore */ 350 /*
351 * Control returns here (1) after the image has been created or the
352 * image creation has failed and (2) after a successful restore.
353 */
352 354
353 Resume_devices: 355 Resume_devices:
354 /* We may need to release the preallocated image pages here. */ 356 /* We may need to release the preallocated image pages here. */
@@ -357,7 +359,10 @@ int hibernation_snapshot(int platform_mode)
357 359
358 dpm_resume_end(in_suspend ? 360 dpm_resume_end(in_suspend ?
359 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); 361 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
360 set_gfp_allowed_mask(saved_mask); 362
363 if (error || !in_suspend)
364 pm_restore_gfp_mask();
365
361 resume_console(); 366 resume_console();
362 Close: 367 Close:
363 platform_end(platform_mode); 368 platform_end(platform_mode);
@@ -452,17 +457,16 @@ static int resume_target_kernel(bool platform_mode)
452int hibernation_restore(int platform_mode) 457int hibernation_restore(int platform_mode)
453{ 458{
454 int error; 459 int error;
455 gfp_t saved_mask;
456 460
457 pm_prepare_console(); 461 pm_prepare_console();
458 suspend_console(); 462 suspend_console();
459 saved_mask = clear_gfp_allowed_mask(GFP_IOFS); 463 pm_restrict_gfp_mask();
460 error = dpm_suspend_start(PMSG_QUIESCE); 464 error = dpm_suspend_start(PMSG_QUIESCE);
461 if (!error) { 465 if (!error) {
462 error = resume_target_kernel(platform_mode); 466 error = resume_target_kernel(platform_mode);
463 dpm_resume_end(PMSG_RECOVER); 467 dpm_resume_end(PMSG_RECOVER);
464 } 468 }
465 set_gfp_allowed_mask(saved_mask); 469 pm_restore_gfp_mask();
466 resume_console(); 470 resume_console();
467 pm_restore_console(); 471 pm_restore_console();
468 return error; 472 return error;
@@ -476,7 +480,6 @@ int hibernation_restore(int platform_mode)
476int hibernation_platform_enter(void) 480int hibernation_platform_enter(void)
477{ 481{
478 int error; 482 int error;
479 gfp_t saved_mask;
480 483
481 if (!hibernation_ops) 484 if (!hibernation_ops)
482 return -ENOSYS; 485 return -ENOSYS;
@@ -492,7 +495,6 @@ int hibernation_platform_enter(void)
492 495
493 entering_platform_hibernation = true; 496 entering_platform_hibernation = true;
494 suspend_console(); 497 suspend_console();
495 saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
496 error = dpm_suspend_start(PMSG_HIBERNATE); 498 error = dpm_suspend_start(PMSG_HIBERNATE);
497 if (error) { 499 if (error) {
498 if (hibernation_ops->recover) 500 if (hibernation_ops->recover)
@@ -536,7 +538,6 @@ int hibernation_platform_enter(void)
536 Resume_devices: 538 Resume_devices:
537 entering_platform_hibernation = false; 539 entering_platform_hibernation = false;
538 dpm_resume_end(PMSG_RESTORE); 540 dpm_resume_end(PMSG_RESTORE);
539 set_gfp_allowed_mask(saved_mask);
540 resume_console(); 541 resume_console();
541 542
542 Close: 543 Close:
@@ -646,6 +647,7 @@ int hibernate(void)
646 swsusp_free(); 647 swsusp_free();
647 if (!error) 648 if (!error)
648 power_down(); 649 power_down();
650 pm_restore_gfp_mask();
649 } else { 651 } else {
650 pr_debug("PM: Image restored successfully.\n"); 652 pr_debug("PM: Image restored successfully.\n");
651 } 653 }
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 7335952ee473..ecf770509d0d 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -197,7 +197,6 @@ static int suspend_enter(suspend_state_t state)
197int suspend_devices_and_enter(suspend_state_t state) 197int suspend_devices_and_enter(suspend_state_t state)
198{ 198{
199 int error; 199 int error;
200 gfp_t saved_mask;
201 200
202 if (!suspend_ops) 201 if (!suspend_ops)
203 return -ENOSYS; 202 return -ENOSYS;
@@ -208,7 +207,7 @@ int suspend_devices_and_enter(suspend_state_t state)
208 goto Close; 207 goto Close;
209 } 208 }
210 suspend_console(); 209 suspend_console();
211 saved_mask = clear_gfp_allowed_mask(GFP_IOFS); 210 pm_restrict_gfp_mask();
212 suspend_test_start(); 211 suspend_test_start();
213 error = dpm_suspend_start(PMSG_SUSPEND); 212 error = dpm_suspend_start(PMSG_SUSPEND);
214 if (error) { 213 if (error) {
@@ -225,7 +224,7 @@ int suspend_devices_and_enter(suspend_state_t state)
225 suspend_test_start(); 224 suspend_test_start();
226 dpm_resume_end(PMSG_RESUME); 225 dpm_resume_end(PMSG_RESUME);
227 suspend_test_finish("resume devices"); 226 suspend_test_finish("resume devices");
228 set_gfp_allowed_mask(saved_mask); 227 pm_restore_gfp_mask();
229 resume_console(); 228 resume_console();
230 Close: 229 Close:
231 if (suspend_ops->end) 230 if (suspend_ops->end)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index a0e4a86ccf94..baf667bb2794 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -6,6 +6,7 @@
6 * 6 *
7 * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> 7 * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
9 * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com>
9 * 10 *
10 * This file is released under the GPLv2. 11 * This file is released under the GPLv2.
11 * 12 *
@@ -753,30 +754,43 @@ static int load_image_lzo(struct swap_map_handle *handle,
753{ 754{
754 unsigned int m; 755 unsigned int m;
755 int error = 0; 756 int error = 0;
757 struct bio *bio;
756 struct timeval start; 758 struct timeval start;
757 struct timeval stop; 759 struct timeval stop;
758 unsigned nr_pages; 760 unsigned nr_pages;
759 size_t off, unc_len, cmp_len; 761 size_t i, off, unc_len, cmp_len;
760 unsigned char *unc, *cmp, *page; 762 unsigned char *unc, *cmp, *page[LZO_CMP_PAGES];
761 763
762 page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); 764 for (i = 0; i < LZO_CMP_PAGES; i++) {
763 if (!page) { 765 page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
764 printk(KERN_ERR "PM: Failed to allocate LZO page\n"); 766 if (!page[i]) {
765 return -ENOMEM; 767 printk(KERN_ERR "PM: Failed to allocate LZO page\n");
768
769 while (i)
770 free_page((unsigned long)page[--i]);
771
772 return -ENOMEM;
773 }
766 } 774 }
767 775
768 unc = vmalloc(LZO_UNC_SIZE); 776 unc = vmalloc(LZO_UNC_SIZE);
769 if (!unc) { 777 if (!unc) {
770 printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); 778 printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
771 free_page((unsigned long)page); 779
780 for (i = 0; i < LZO_CMP_PAGES; i++)
781 free_page((unsigned long)page[i]);
782
772 return -ENOMEM; 783 return -ENOMEM;
773 } 784 }
774 785
775 cmp = vmalloc(LZO_CMP_SIZE); 786 cmp = vmalloc(LZO_CMP_SIZE);
776 if (!cmp) { 787 if (!cmp) {
777 printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); 788 printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
789
778 vfree(unc); 790 vfree(unc);
779 free_page((unsigned long)page); 791 for (i = 0; i < LZO_CMP_PAGES; i++)
792 free_page((unsigned long)page[i]);
793
780 return -ENOMEM; 794 return -ENOMEM;
781 } 795 }
782 796
@@ -787,6 +801,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
787 if (!m) 801 if (!m)
788 m = 1; 802 m = 1;
789 nr_pages = 0; 803 nr_pages = 0;
804 bio = NULL;
790 do_gettimeofday(&start); 805 do_gettimeofday(&start);
791 806
792 error = snapshot_write_next(snapshot); 807 error = snapshot_write_next(snapshot);
@@ -794,11 +809,11 @@ static int load_image_lzo(struct swap_map_handle *handle,
794 goto out_finish; 809 goto out_finish;
795 810
796 for (;;) { 811 for (;;) {
797 error = swap_read_page(handle, page, NULL); /* sync */ 812 error = swap_read_page(handle, page[0], NULL); /* sync */
798 if (error) 813 if (error)
799 break; 814 break;
800 815
801 cmp_len = *(size_t *)page; 816 cmp_len = *(size_t *)page[0];
802 if (unlikely(!cmp_len || 817 if (unlikely(!cmp_len ||
803 cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) { 818 cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {
804 printk(KERN_ERR "PM: Invalid LZO compressed length\n"); 819 printk(KERN_ERR "PM: Invalid LZO compressed length\n");
@@ -806,13 +821,20 @@ static int load_image_lzo(struct swap_map_handle *handle,
806 break; 821 break;
807 } 822 }
808 823
809 memcpy(cmp, page, PAGE_SIZE); 824 for (off = PAGE_SIZE, i = 1;
810 for (off = PAGE_SIZE; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) { 825 off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
811 error = swap_read_page(handle, page, NULL); /* sync */ 826 error = swap_read_page(handle, page[i], &bio);
812 if (error) 827 if (error)
813 goto out_finish; 828 goto out_finish;
829 }
814 830
815 memcpy(cmp + off, page, PAGE_SIZE); 831 error = hib_wait_on_bio_chain(&bio); /* need all data now */
832 if (error)
833 goto out_finish;
834
835 for (off = 0, i = 0;
836 off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
837 memcpy(cmp + off, page[i], PAGE_SIZE);
816 } 838 }
817 839
818 unc_len = LZO_UNC_SIZE; 840 unc_len = LZO_UNC_SIZE;
@@ -857,7 +879,8 @@ out_finish:
857 879
858 vfree(cmp); 880 vfree(cmp);
859 vfree(unc); 881 vfree(unc);
860 free_page((unsigned long)page); 882 for (i = 0; i < LZO_CMP_PAGES; i++)
883 free_page((unsigned long)page[i]);
861 884
862 return error; 885 return error;
863} 886}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index e819e17877ca..1b2ea31e6bd8 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -263,6 +263,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
263 case SNAPSHOT_UNFREEZE: 263 case SNAPSHOT_UNFREEZE:
264 if (!data->frozen || data->ready) 264 if (!data->frozen || data->ready)
265 break; 265 break;
266 pm_restore_gfp_mask();
266 thaw_processes(); 267 thaw_processes();
267 usermodehelper_enable(); 268 usermodehelper_enable();
268 data->frozen = 0; 269 data->frozen = 0;
@@ -275,6 +276,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
275 error = -EPERM; 276 error = -EPERM;
276 break; 277 break;
277 } 278 }
279 pm_restore_gfp_mask();
278 error = hibernation_snapshot(data->platform_support); 280 error = hibernation_snapshot(data->platform_support);
279 if (!error) 281 if (!error)
280 error = put_user(in_suspend, (int __user *)arg); 282 error = put_user(in_suspend, (int __user *)arg);
diff --git a/kernel/sched.c b/kernel/sched.c
index aa14a56f9d03..dc91a4d09ac3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -560,18 +560,8 @@ struct rq {
560 560
561static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 561static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
562 562
563static inline
564void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
565{
566 rq->curr->sched_class->check_preempt_curr(rq, p, flags);
567 563
568 /* 564static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
569 * A queue event has occurred, and we're going to schedule. In
570 * this case, we can save a useless back to back clock update.
571 */
572 if (test_tsk_need_resched(p))
573 rq->skip_clock_update = 1;
574}
575 565
576static inline int cpu_of(struct rq *rq) 566static inline int cpu_of(struct rq *rq)
577{ 567{
@@ -2118,6 +2108,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
2118 p->sched_class->prio_changed(rq, p, oldprio, running); 2108 p->sched_class->prio_changed(rq, p, oldprio, running);
2119} 2109}
2120 2110
2111static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
2112{
2113 const struct sched_class *class;
2114
2115 if (p->sched_class == rq->curr->sched_class) {
2116 rq->curr->sched_class->check_preempt_curr(rq, p, flags);
2117 } else {
2118 for_each_class(class) {
2119 if (class == rq->curr->sched_class)
2120 break;
2121 if (class == p->sched_class) {
2122 resched_task(rq->curr);
2123 break;
2124 }
2125 }
2126 }
2127
2128 /*
2129 * A queue event has occurred, and we're going to schedule. In
2130 * this case, we can save a useless back to back clock update.
2131 */
2132 if (test_tsk_need_resched(rq->curr))
2133 rq->skip_clock_update = 1;
2134}
2135
2121#ifdef CONFIG_SMP 2136#ifdef CONFIG_SMP
2122/* 2137/*
2123 * Is this task likely cache-hot: 2138 * Is this task likely cache-hot:
@@ -6960,6 +6975,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
6960 if (cpu != group_first_cpu(sd->groups)) 6975 if (cpu != group_first_cpu(sd->groups))
6961 return; 6976 return;
6962 6977
6978 sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
6979
6963 child = sd->child; 6980 child = sd->child;
6964 6981
6965 sd->groups->cpu_power = 0; 6982 sd->groups->cpu_power = 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f4f6a8326dd0..00ebd7686676 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1654 struct cfs_rq *cfs_rq = task_cfs_rq(curr); 1654 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1655 int scale = cfs_rq->nr_running >= sched_nr_latency; 1655 int scale = cfs_rq->nr_running >= sched_nr_latency;
1656 1656
1657 if (unlikely(rt_prio(p->prio)))
1658 goto preempt;
1659
1660 if (unlikely(p->sched_class != &fair_sched_class))
1661 return;
1662
1663 if (unlikely(se == pse)) 1657 if (unlikely(se == pse))
1664 return; 1658 return;
1665 1659
@@ -1764,10 +1758,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
1764 set_task_cpu(p, this_cpu); 1758 set_task_cpu(p, this_cpu);
1765 activate_task(this_rq, p, 0); 1759 activate_task(this_rq, p, 0);
1766 check_preempt_curr(this_rq, p, 0); 1760 check_preempt_curr(this_rq, p, 0);
1767
1768 /* re-arm NEWIDLE balancing when moving tasks */
1769 src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost;
1770 this_rq->idle_stamp = 0;
1771} 1761}
1772 1762
1773/* 1763/*
@@ -2035,13 +2025,16 @@ struct sd_lb_stats {
2035 unsigned long this_load_per_task; 2025 unsigned long this_load_per_task;
2036 unsigned long this_nr_running; 2026 unsigned long this_nr_running;
2037 unsigned long this_has_capacity; 2027 unsigned long this_has_capacity;
2028 unsigned int this_idle_cpus;
2038 2029
2039 /* Statistics of the busiest group */ 2030 /* Statistics of the busiest group */
2031 unsigned int busiest_idle_cpus;
2040 unsigned long max_load; 2032 unsigned long max_load;
2041 unsigned long busiest_load_per_task; 2033 unsigned long busiest_load_per_task;
2042 unsigned long busiest_nr_running; 2034 unsigned long busiest_nr_running;
2043 unsigned long busiest_group_capacity; 2035 unsigned long busiest_group_capacity;
2044 unsigned long busiest_has_capacity; 2036 unsigned long busiest_has_capacity;
2037 unsigned int busiest_group_weight;
2045 2038
2046 int group_imb; /* Is there imbalance in this sd */ 2039 int group_imb; /* Is there imbalance in this sd */
2047#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 2040#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -2063,6 +2056,8 @@ struct sg_lb_stats {
2063 unsigned long sum_nr_running; /* Nr tasks running in the group */ 2056 unsigned long sum_nr_running; /* Nr tasks running in the group */
2064 unsigned long sum_weighted_load; /* Weighted load of group's tasks */ 2057 unsigned long sum_weighted_load; /* Weighted load of group's tasks */
2065 unsigned long group_capacity; 2058 unsigned long group_capacity;
2059 unsigned long idle_cpus;
2060 unsigned long group_weight;
2066 int group_imb; /* Is there an imbalance in the group ? */ 2061 int group_imb; /* Is there an imbalance in the group ? */
2067 int group_has_capacity; /* Is there extra capacity in the group? */ 2062 int group_has_capacity; /* Is there extra capacity in the group? */
2068}; 2063};
@@ -2431,7 +2426,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2431 sgs->group_load += load; 2426 sgs->group_load += load;
2432 sgs->sum_nr_running += rq->nr_running; 2427 sgs->sum_nr_running += rq->nr_running;
2433 sgs->sum_weighted_load += weighted_cpuload(i); 2428 sgs->sum_weighted_load += weighted_cpuload(i);
2434 2429 if (idle_cpu(i))
2430 sgs->idle_cpus++;
2435 } 2431 }
2436 2432
2437 /* 2433 /*
@@ -2469,6 +2465,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2469 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); 2465 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
2470 if (!sgs->group_capacity) 2466 if (!sgs->group_capacity)
2471 sgs->group_capacity = fix_small_capacity(sd, group); 2467 sgs->group_capacity = fix_small_capacity(sd, group);
2468 sgs->group_weight = group->group_weight;
2472 2469
2473 if (sgs->group_capacity > sgs->sum_nr_running) 2470 if (sgs->group_capacity > sgs->sum_nr_running)
2474 sgs->group_has_capacity = 1; 2471 sgs->group_has_capacity = 1;
@@ -2576,13 +2573,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
2576 sds->this_nr_running = sgs.sum_nr_running; 2573 sds->this_nr_running = sgs.sum_nr_running;
2577 sds->this_load_per_task = sgs.sum_weighted_load; 2574 sds->this_load_per_task = sgs.sum_weighted_load;
2578 sds->this_has_capacity = sgs.group_has_capacity; 2575 sds->this_has_capacity = sgs.group_has_capacity;
2576 sds->this_idle_cpus = sgs.idle_cpus;
2579 } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { 2577 } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
2580 sds->max_load = sgs.avg_load; 2578 sds->max_load = sgs.avg_load;
2581 sds->busiest = sg; 2579 sds->busiest = sg;
2582 sds->busiest_nr_running = sgs.sum_nr_running; 2580 sds->busiest_nr_running = sgs.sum_nr_running;
2581 sds->busiest_idle_cpus = sgs.idle_cpus;
2583 sds->busiest_group_capacity = sgs.group_capacity; 2582 sds->busiest_group_capacity = sgs.group_capacity;
2584 sds->busiest_load_per_task = sgs.sum_weighted_load; 2583 sds->busiest_load_per_task = sgs.sum_weighted_load;
2585 sds->busiest_has_capacity = sgs.group_has_capacity; 2584 sds->busiest_has_capacity = sgs.group_has_capacity;
2585 sds->busiest_group_weight = sgs.group_weight;
2586 sds->group_imb = sgs.group_imb; 2586 sds->group_imb = sgs.group_imb;
2587 } 2587 }
2588 2588
@@ -2860,8 +2860,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2860 if (sds.this_load >= sds.avg_load) 2860 if (sds.this_load >= sds.avg_load)
2861 goto out_balanced; 2861 goto out_balanced;
2862 2862
2863 if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) 2863 /*
2864 goto out_balanced; 2864 * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
2865 * And to check for busy balance use !idle_cpu instead of
2866 * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
2867 * even when they are idle.
2868 */
2869 if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
2870 if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
2871 goto out_balanced;
2872 } else {
2873 /*
2874 * This cpu is idle. If the busiest group load doesn't
2875 * have more tasks than the number of available cpu's and
2876 * there is no imbalance between this and busiest group
2877 * wrt to idle cpu's, it is balanced.
2878 */
2879 if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
2880 sds.busiest_nr_running <= sds.busiest_group_weight)
2881 goto out_balanced;
2882 }
2865 2883
2866force_balance: 2884force_balance:
2867 /* Looks like there is an imbalance. Compute it */ 2885 /* Looks like there is an imbalance. Compute it */
@@ -3197,8 +3215,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3197 interval = msecs_to_jiffies(sd->balance_interval); 3215 interval = msecs_to_jiffies(sd->balance_interval);
3198 if (time_after(next_balance, sd->last_balance + interval)) 3216 if (time_after(next_balance, sd->last_balance + interval))
3199 next_balance = sd->last_balance + interval; 3217 next_balance = sd->last_balance + interval;
3200 if (pulled_task) 3218 if (pulled_task) {
3219 this_rq->idle_stamp = 0;
3201 break; 3220 break;
3221 }
3202 } 3222 }
3203 3223
3204 raw_spin_lock(&this_rq->lock); 3224 raw_spin_lock(&this_rq->lock);
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 45bddc0c1048..2bf6b47058c1 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -19,14 +19,14 @@ select_task_rq_stop(struct rq *rq, struct task_struct *p,
19static void 19static void
20check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) 20check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
21{ 21{
22 resched_task(rq->curr); /* we preempt everything */ 22 /* we're never preempted */
23} 23}
24 24
25static struct task_struct *pick_next_task_stop(struct rq *rq) 25static struct task_struct *pick_next_task_stop(struct rq *rq)
26{ 26{
27 struct task_struct *stop = rq->stop; 27 struct task_struct *stop = rq->stop;
28 28
29 if (stop && stop->state == TASK_RUNNING) 29 if (stop && stop->se.on_rq)
30 return stop; 30 return stop;
31 31
32 return NULL; 32 return NULL;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b65bf634035e..5abfa1518554 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -702,7 +702,6 @@ static struct ctl_table kern_table[] = {
702 .extra1 = &zero, 702 .extra1 = &zero,
703 .extra2 = &ten_thousand, 703 .extra2 = &ten_thousand,
704 }, 704 },
705#endif
706 { 705 {
707 .procname = "dmesg_restrict", 706 .procname = "dmesg_restrict",
708 .data = &dmesg_restrict, 707 .data = &dmesg_restrict,
@@ -712,6 +711,7 @@ static struct ctl_table kern_table[] = {
712 .extra1 = &zero, 711 .extra1 = &zero,
713 .extra2 = &one, 712 .extra2 = &one,
714 }, 713 },
714#endif
715 { 715 {
716 .procname = "ngroups_max", 716 .procname = "ngroups_max",
717 .data = &ngroups_max, 717 .data = &ngroups_max,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e04b8bcdef88..ea37e2ff4164 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -126,7 +126,7 @@ if FTRACE
126config FUNCTION_TRACER 126config FUNCTION_TRACER
127 bool "Kernel Function Tracer" 127 bool "Kernel Function Tracer"
128 depends on HAVE_FUNCTION_TRACER 128 depends on HAVE_FUNCTION_TRACER
129 select FRAME_POINTER if (!ARM_UNWIND) 129 select FRAME_POINTER if !ARM_UNWIND && !S390
130 select KALLSYMS 130 select KALLSYMS
131 select GENERIC_TRACER 131 select GENERIC_TRACER
132 select CONTEXT_SWITCH_TRACER 132 select CONTEXT_SWITCH_TRACER
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 82d9b8106cd0..c380612273bf 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,7 +17,6 @@
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/smp_lock.h>
21#include <linux/notifier.h> 20#include <linux/notifier.h>
22#include <linux/irqflags.h> 21#include <linux/irqflags.h>
23#include <linux/debugfs.h> 22#include <linux/debugfs.h>
@@ -1284,6 +1283,8 @@ void trace_dump_stack(void)
1284 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); 1283 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1285} 1284}
1286 1285
1286static DEFINE_PER_CPU(int, user_stack_count);
1287
1287void 1288void
1288ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) 1289ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1289{ 1290{
@@ -1302,6 +1303,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1302 if (unlikely(in_nmi())) 1303 if (unlikely(in_nmi()))
1303 return; 1304 return;
1304 1305
1306 /*
1307 * prevent recursion, since the user stack tracing may
1308 * trigger other kernel events.
1309 */
1310 preempt_disable();
1311 if (__this_cpu_read(user_stack_count))
1312 goto out;
1313
1314 __this_cpu_inc(user_stack_count);
1315
1316
1317
1305 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1318 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1306 sizeof(*entry), flags, pc); 1319 sizeof(*entry), flags, pc);
1307 if (!event) 1320 if (!event)
@@ -1319,6 +1332,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1319 save_stack_trace_user(&trace); 1332 save_stack_trace_user(&trace);
1320 if (!filter_check_discard(call, entry, buffer, event)) 1333 if (!filter_check_discard(call, entry, buffer, event))
1321 ring_buffer_unlock_commit(buffer, event); 1334 ring_buffer_unlock_commit(buffer, event);
1335
1336 __this_cpu_dec(user_stack_count);
1337
1338 out:
1339 preempt_enable();
1322} 1340}
1323 1341
1324#ifdef UNUSED 1342#ifdef UNUSED