aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/debug/kdb/kdb_main.c21
-rw-r--r--kernel/futex.c3
-rw-r--r--kernel/futex_compat.c3
-rw-r--r--kernel/hw_breakpoint.c3
-rw-r--r--kernel/irq_work.c4
-rw-r--r--kernel/module.c12
-rw-r--r--kernel/perf_event.c93
-rw-r--r--kernel/pm_qos_params.c4
-rw-r--r--kernel/posix-cpu-timers.c12
-rw-r--r--kernel/power/Kconfig4
-rw-r--r--kernel/sched.c39
-rw-r--r--kernel/sched_fair.c48
-rw-r--r--kernel/sched_stoptask.c4
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/trace.c20
16 files changed, 205 insertions, 69 deletions
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 37755d621924..a6e729766821 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -82,7 +82,7 @@ static kdbtab_t kdb_base_commands[50];
82#define for_each_kdbcmd(cmd, num) \ 82#define for_each_kdbcmd(cmd, num) \
83 for ((cmd) = kdb_base_commands, (num) = 0; \ 83 for ((cmd) = kdb_base_commands, (num) = 0; \
84 num < kdb_max_commands; \ 84 num < kdb_max_commands; \
85 num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++, num++) 85 num++, num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++)
86 86
87typedef struct _kdbmsg { 87typedef struct _kdbmsg {
88 int km_diag; /* kdb diagnostic */ 88 int km_diag; /* kdb diagnostic */
@@ -646,7 +646,7 @@ static int kdb_defcmd2(const char *cmdstr, const char *argv0)
646 } 646 }
647 if (!s->usable) 647 if (!s->usable)
648 return KDB_NOTIMP; 648 return KDB_NOTIMP;
649 s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB); 649 s->command = kzalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);
650 if (!s->command) { 650 if (!s->command) {
651 kdb_printf("Could not allocate new kdb_defcmd table for %s\n", 651 kdb_printf("Could not allocate new kdb_defcmd table for %s\n",
652 cmdstr); 652 cmdstr);
@@ -2361,7 +2361,7 @@ static int kdb_pid(int argc, const char **argv)
2361 */ 2361 */
2362static int kdb_ll(int argc, const char **argv) 2362static int kdb_ll(int argc, const char **argv)
2363{ 2363{
2364 int diag; 2364 int diag = 0;
2365 unsigned long addr; 2365 unsigned long addr;
2366 long offset = 0; 2366 long offset = 0;
2367 unsigned long va; 2367 unsigned long va;
@@ -2400,20 +2400,21 @@ static int kdb_ll(int argc, const char **argv)
2400 char buf[80]; 2400 char buf[80];
2401 2401
2402 if (KDB_FLAG(CMD_INTERRUPT)) 2402 if (KDB_FLAG(CMD_INTERRUPT))
2403 return 0; 2403 goto out;
2404 2404
2405 sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va); 2405 sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va);
2406 diag = kdb_parse(buf); 2406 diag = kdb_parse(buf);
2407 if (diag) 2407 if (diag)
2408 return diag; 2408 goto out;
2409 2409
2410 addr = va + linkoffset; 2410 addr = va + linkoffset;
2411 if (kdb_getword(&va, addr, sizeof(va))) 2411 if (kdb_getword(&va, addr, sizeof(va)))
2412 return 0; 2412 goto out;
2413 } 2413 }
2414 kfree(command);
2415 2414
2416 return 0; 2415out:
2416 kfree(command);
2417 return diag;
2417} 2418}
2418 2419
2419static int kdb_kgdb(int argc, const char **argv) 2420static int kdb_kgdb(int argc, const char **argv)
@@ -2739,13 +2740,13 @@ int kdb_register_repeat(char *cmd,
2739 } 2740 }
2740 if (kdb_commands) { 2741 if (kdb_commands) {
2741 memcpy(new, kdb_commands, 2742 memcpy(new, kdb_commands,
2742 kdb_max_commands * sizeof(*new)); 2743 (kdb_max_commands - KDB_BASE_CMD_MAX) * sizeof(*new));
2743 kfree(kdb_commands); 2744 kfree(kdb_commands);
2744 } 2745 }
2745 memset(new + kdb_max_commands, 0, 2746 memset(new + kdb_max_commands, 0,
2746 kdb_command_extend * sizeof(*new)); 2747 kdb_command_extend * sizeof(*new));
2747 kdb_commands = new; 2748 kdb_commands = new;
2748 kp = kdb_commands + kdb_max_commands; 2749 kp = kdb_commands + kdb_max_commands - KDB_BASE_CMD_MAX;
2749 kdb_max_commands += kdb_command_extend; 2750 kdb_max_commands += kdb_command_extend;
2750 } 2751 }
2751 2752
diff --git a/kernel/futex.c b/kernel/futex.c
index 6c683b37f2ce..40a8777a27d0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2489,7 +2489,8 @@ void exit_robust_list(struct task_struct *curr)
2489{ 2489{
2490 struct robust_list_head __user *head = curr->robust_list; 2490 struct robust_list_head __user *head = curr->robust_list;
2491 struct robust_list __user *entry, *next_entry, *pending; 2491 struct robust_list __user *entry, *next_entry, *pending;
2492 unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; 2492 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2493 unsigned int uninitialized_var(next_pi);
2493 unsigned long futex_offset; 2494 unsigned long futex_offset;
2494 int rc; 2495 int rc;
2495 2496
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 06da4dfc339b..a7934ac75e5b 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -49,7 +49,8 @@ void compat_exit_robust_list(struct task_struct *curr)
49{ 49{
50 struct compat_robust_list_head __user *head = curr->compat_robust_list; 50 struct compat_robust_list_head __user *head = curr->compat_robust_list;
51 struct robust_list __user *entry, *next_entry, *pending; 51 struct robust_list __user *entry, *next_entry, *pending;
52 unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; 52 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
53 unsigned int uninitialized_var(next_pi);
53 compat_uptr_t uentry, next_uentry, upending; 54 compat_uptr_t uentry, next_uentry, upending;
54 compat_long_t futex_offset; 55 compat_long_t futex_offset;
55 int rc; 56 int rc;
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 2c9120f0afca..e5325825aeb6 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = {
620 .read = hw_breakpoint_pmu_read, 620 .read = hw_breakpoint_pmu_read,
621}; 621};
622 622
623static int __init init_hw_breakpoint(void) 623int __init init_hw_breakpoint(void)
624{ 624{
625 unsigned int **task_bp_pinned; 625 unsigned int **task_bp_pinned;
626 int cpu, err_cpu; 626 int cpu, err_cpu;
@@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void)
655 655
656 return -ENOMEM; 656 return -ENOMEM;
657} 657}
658core_initcall(init_hw_breakpoint);
659 658
660 659
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index f16763ff8481..90f881904bb1 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -145,7 +145,9 @@ void irq_work_run(void)
145 * Clear the BUSY bit and return to the free state if 145 * Clear the BUSY bit and return to the free state if
146 * no-one else claimed it meanwhile. 146 * no-one else claimed it meanwhile.
147 */ 147 */
148 cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL); 148 (void)cmpxchg(&entry->next,
149 next_flags(NULL, IRQ_WORK_BUSY),
150 NULL);
149 } 151 }
150} 152}
151EXPORT_SYMBOL_GPL(irq_work_run); 153EXPORT_SYMBOL_GPL(irq_work_run);
diff --git a/kernel/module.c b/kernel/module.c
index 437a74a7524a..d190664f25ff 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info)
2326 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * 2326 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
2327 mod->num_trace_events, GFP_KERNEL); 2327 mod->num_trace_events, GFP_KERNEL);
2328#endif 2328#endif
2329#ifdef CONFIG_TRACING
2330 mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
2331 sizeof(*mod->trace_bprintk_fmt_start),
2332 &mod->num_trace_bprintk_fmt);
2333 /*
2334 * This section contains pointers to allocated objects in the trace
2335 * code and not scanning it leads to false positives.
2336 */
2337 kmemleak_scan_area(mod->trace_bprintk_fmt_start,
2338 sizeof(*mod->trace_bprintk_fmt_start) *
2339 mod->num_trace_bprintk_fmt, GFP_KERNEL);
2340#endif
2329#ifdef CONFIG_FTRACE_MCOUNT_RECORD 2341#ifdef CONFIG_FTRACE_MCOUNT_RECORD
2330 /* sechdrs[0].sh_size is always zero */ 2342 /* sechdrs[0].sh_size is always zero */
2331 mod->ftrace_callsites = section_objs(info, "__mcount_loc", 2343 mod->ftrace_callsites = section_objs(info, "__mcount_loc",
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index cb6c0d2af68f..eac7e3364335 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,6 +31,7 @@
31#include <linux/kernel_stat.h> 31#include <linux/kernel_stat.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/ftrace_event.h> 33#include <linux/ftrace_event.h>
34#include <linux/hw_breakpoint.h>
34 35
35#include <asm/irq_regs.h> 36#include <asm/irq_regs.h>
36 37
@@ -1286,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
1286{ 1287{
1287 int ctxn; 1288 int ctxn;
1288 1289
1289 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1290
1291 for_each_task_context_nr(ctxn) 1290 for_each_task_context_nr(ctxn)
1292 perf_event_context_sched_out(task, ctxn, next); 1291 perf_event_context_sched_out(task, ctxn, next);
1293} 1292}
@@ -1621,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx)
1621{ 1620{
1622 raw_spin_lock(&ctx->lock); 1621 raw_spin_lock(&ctx->lock);
1623 1622
1624 /* Rotate the first entry last of non-pinned groups */ 1623 /*
1625 list_rotate_left(&ctx->flexible_groups); 1624 * Rotate the first entry last of non-pinned groups. Rotation might be
1625 * disabled by the inheritance code.
1626 */
1627 if (!ctx->rotate_disable)
1628 list_rotate_left(&ctx->flexible_groups);
1626 1629
1627 raw_spin_unlock(&ctx->lock); 1630 raw_spin_unlock(&ctx->lock);
1628} 1631}
@@ -2234,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event)
2234 raw_spin_unlock_irq(&ctx->lock); 2237 raw_spin_unlock_irq(&ctx->lock);
2235 mutex_unlock(&ctx->mutex); 2238 mutex_unlock(&ctx->mutex);
2236 2239
2237 mutex_lock(&event->owner->perf_event_mutex);
2238 list_del_init(&event->owner_entry);
2239 mutex_unlock(&event->owner->perf_event_mutex);
2240 put_task_struct(event->owner);
2241
2242 free_event(event); 2240 free_event(event);
2243 2241
2244 return 0; 2242 return 0;
@@ -2251,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
2251static int perf_release(struct inode *inode, struct file *file) 2249static int perf_release(struct inode *inode, struct file *file)
2252{ 2250{
2253 struct perf_event *event = file->private_data; 2251 struct perf_event *event = file->private_data;
2252 struct task_struct *owner;
2254 2253
2255 file->private_data = NULL; 2254 file->private_data = NULL;
2256 2255
2256 rcu_read_lock();
2257 owner = ACCESS_ONCE(event->owner);
2258 /*
2259 * Matches the smp_wmb() in perf_event_exit_task(). If we observe
2260 * !owner it means the list deletion is complete and we can indeed
2261 * free this event, otherwise we need to serialize on
2262 * owner->perf_event_mutex.
2263 */
2264 smp_read_barrier_depends();
2265 if (owner) {
2266 /*
2267 * Since delayed_put_task_struct() also drops the last
2268 * task reference we can safely take a new reference
2269 * while holding the rcu_read_lock().
2270 */
2271 get_task_struct(owner);
2272 }
2273 rcu_read_unlock();
2274
2275 if (owner) {
2276 mutex_lock(&owner->perf_event_mutex);
2277 /*
2278 * We have to re-check the event->owner field, if it is cleared
2279 * we raced with perf_event_exit_task(), acquiring the mutex
2280 * ensured they're done, and we can proceed with freeing the
2281 * event.
2282 */
2283 if (event->owner)
2284 list_del_init(&event->owner_entry);
2285 mutex_unlock(&owner->perf_event_mutex);
2286 put_task_struct(owner);
2287 }
2288
2257 return perf_event_release_kernel(event); 2289 return perf_event_release_kernel(event);
2258} 2290}
2259 2291
@@ -5677,7 +5709,7 @@ SYSCALL_DEFINE5(perf_event_open,
5677 mutex_unlock(&ctx->mutex); 5709 mutex_unlock(&ctx->mutex);
5678 5710
5679 event->owner = current; 5711 event->owner = current;
5680 get_task_struct(current); 5712
5681 mutex_lock(&current->perf_event_mutex); 5713 mutex_lock(&current->perf_event_mutex);
5682 list_add_tail(&event->owner_entry, &current->perf_event_list); 5714 list_add_tail(&event->owner_entry, &current->perf_event_list);
5683 mutex_unlock(&current->perf_event_mutex); 5715 mutex_unlock(&current->perf_event_mutex);
@@ -5745,12 +5777,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
5745 ++ctx->generation; 5777 ++ctx->generation;
5746 mutex_unlock(&ctx->mutex); 5778 mutex_unlock(&ctx->mutex);
5747 5779
5748 event->owner = current;
5749 get_task_struct(current);
5750 mutex_lock(&current->perf_event_mutex);
5751 list_add_tail(&event->owner_entry, &current->perf_event_list);
5752 mutex_unlock(&current->perf_event_mutex);
5753
5754 return event; 5780 return event;
5755 5781
5756err_free: 5782err_free:
@@ -5901,8 +5927,24 @@ again:
5901 */ 5927 */
5902void perf_event_exit_task(struct task_struct *child) 5928void perf_event_exit_task(struct task_struct *child)
5903{ 5929{
5930 struct perf_event *event, *tmp;
5904 int ctxn; 5931 int ctxn;
5905 5932
5933 mutex_lock(&child->perf_event_mutex);
5934 list_for_each_entry_safe(event, tmp, &child->perf_event_list,
5935 owner_entry) {
5936 list_del_init(&event->owner_entry);
5937
5938 /*
5939 * Ensure the list deletion is visible before we clear
5940 * the owner, closes a race against perf_release() where
5941 * we need to serialize on the owner->perf_event_mutex.
5942 */
5943 smp_wmb();
5944 event->owner = NULL;
5945 }
5946 mutex_unlock(&child->perf_event_mutex);
5947
5906 for_each_task_context_nr(ctxn) 5948 for_each_task_context_nr(ctxn)
5907 perf_event_exit_task_context(child, ctxn); 5949 perf_event_exit_task_context(child, ctxn);
5908} 5950}
@@ -6122,6 +6164,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6122 struct perf_event *event; 6164 struct perf_event *event;
6123 struct task_struct *parent = current; 6165 struct task_struct *parent = current;
6124 int inherited_all = 1; 6166 int inherited_all = 1;
6167 unsigned long flags;
6125 int ret = 0; 6168 int ret = 0;
6126 6169
6127 child->perf_event_ctxp[ctxn] = NULL; 6170 child->perf_event_ctxp[ctxn] = NULL;
@@ -6162,6 +6205,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6162 break; 6205 break;
6163 } 6206 }
6164 6207
6208 /*
6209 * We can't hold ctx->lock when iterating the ->flexible_group list due
6210 * to allocations, but we need to prevent rotation because
6211 * rotate_ctx() will change the list from interrupt context.
6212 */
6213 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
6214 parent_ctx->rotate_disable = 1;
6215 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6216
6165 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { 6217 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
6166 ret = inherit_task_group(event, parent, parent_ctx, 6218 ret = inherit_task_group(event, parent, parent_ctx,
6167 child, ctxn, &inherited_all); 6219 child, ctxn, &inherited_all);
@@ -6169,6 +6221,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6169 break; 6221 break;
6170 } 6222 }
6171 6223
6224 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
6225 parent_ctx->rotate_disable = 0;
6226 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6227
6172 child_ctx = child->perf_event_ctxp[ctxn]; 6228 child_ctx = child->perf_event_ctxp[ctxn];
6173 6229
6174 if (child_ctx && inherited_all) { 6230 if (child_ctx && inherited_all) {
@@ -6321,6 +6377,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
6321 6377
6322void __init perf_event_init(void) 6378void __init perf_event_init(void)
6323{ 6379{
6380 int ret;
6381
6324 perf_event_init_all_cpus(); 6382 perf_event_init_all_cpus();
6325 init_srcu_struct(&pmus_srcu); 6383 init_srcu_struct(&pmus_srcu);
6326 perf_pmu_register(&perf_swevent); 6384 perf_pmu_register(&perf_swevent);
@@ -6328,4 +6386,7 @@ void __init perf_event_init(void)
6328 perf_pmu_register(&perf_task_clock); 6386 perf_pmu_register(&perf_task_clock);
6329 perf_tp_register(); 6387 perf_tp_register();
6330 perf_cpu_notifier(perf_cpu_notify); 6388 perf_cpu_notifier(perf_cpu_notify);
6389
6390 ret = init_hw_breakpoint();
6391 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
6331} 6392}
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index c7a8f453919e..aeaa7f846821 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -121,10 +121,10 @@ static inline int pm_qos_get_value(struct pm_qos_object *o)
121 121
122 switch (o->type) { 122 switch (o->type) {
123 case PM_QOS_MIN: 123 case PM_QOS_MIN:
124 return plist_last(&o->requests)->prio; 124 return plist_first(&o->requests)->prio;
125 125
126 case PM_QOS_MAX: 126 case PM_QOS_MAX:
127 return plist_first(&o->requests)->prio; 127 return plist_last(&o->requests)->prio;
128 128
129 default: 129 default:
130 /* runtime check for not using enum */ 130 /* runtime check for not using enum */
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 6842eeba5879..05bb7173850e 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -37,13 +37,13 @@ static int check_clock(const clockid_t which_clock)
37 if (pid == 0) 37 if (pid == 0)
38 return 0; 38 return 0;
39 39
40 read_lock(&tasklist_lock); 40 rcu_read_lock();
41 p = find_task_by_vpid(pid); 41 p = find_task_by_vpid(pid);
42 if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? 42 if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
43 same_thread_group(p, current) : thread_group_leader(p))) { 43 same_thread_group(p, current) : has_group_leader_pid(p))) {
44 error = -EINVAL; 44 error = -EINVAL;
45 } 45 }
46 read_unlock(&tasklist_lock); 46 rcu_read_unlock();
47 47
48 return error; 48 return error;
49} 49}
@@ -390,7 +390,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
390 390
391 INIT_LIST_HEAD(&new_timer->it.cpu.entry); 391 INIT_LIST_HEAD(&new_timer->it.cpu.entry);
392 392
393 read_lock(&tasklist_lock); 393 rcu_read_lock();
394 if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { 394 if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
395 if (pid == 0) { 395 if (pid == 0) {
396 p = current; 396 p = current;
@@ -404,7 +404,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
404 p = current->group_leader; 404 p = current->group_leader;
405 } else { 405 } else {
406 p = find_task_by_vpid(pid); 406 p = find_task_by_vpid(pid);
407 if (p && !thread_group_leader(p)) 407 if (p && !has_group_leader_pid(p))
408 p = NULL; 408 p = NULL;
409 } 409 }
410 } 410 }
@@ -414,7 +414,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
414 } else { 414 } else {
415 ret = -EINVAL; 415 ret = -EINVAL;
416 } 416 }
417 read_unlock(&tasklist_lock); 417 rcu_read_unlock();
418 418
419 return ret; 419 return ret;
420} 420}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 29bff6117abc..a5aff3ebad38 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -246,9 +246,13 @@ config PM_OPS
246 depends on PM_SLEEP || PM_RUNTIME 246 depends on PM_SLEEP || PM_RUNTIME
247 default y 247 default y
248 248
249config ARCH_HAS_OPP
250 bool
251
249config PM_OPP 252config PM_OPP
250 bool "Operating Performance Point (OPP) Layer library" 253 bool "Operating Performance Point (OPP) Layer library"
251 depends on PM 254 depends on PM
255 depends on ARCH_HAS_OPP
252 ---help--- 256 ---help---
253 SOCs have a standard set of tuples consisting of frequency and 257 SOCs have a standard set of tuples consisting of frequency and
254 voltage pairs that the device will support per voltage domain. This 258 voltage pairs that the device will support per voltage domain. This
diff --git a/kernel/sched.c b/kernel/sched.c
index aa14a56f9d03..dc91a4d09ac3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -560,18 +560,8 @@ struct rq {
560 560
561static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 561static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
562 562
563static inline
564void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
565{
566 rq->curr->sched_class->check_preempt_curr(rq, p, flags);
567 563
568 /* 564static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
569 * A queue event has occurred, and we're going to schedule. In
570 * this case, we can save a useless back to back clock update.
571 */
572 if (test_tsk_need_resched(p))
573 rq->skip_clock_update = 1;
574}
575 565
576static inline int cpu_of(struct rq *rq) 566static inline int cpu_of(struct rq *rq)
577{ 567{
@@ -2118,6 +2108,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
2118 p->sched_class->prio_changed(rq, p, oldprio, running); 2108 p->sched_class->prio_changed(rq, p, oldprio, running);
2119} 2109}
2120 2110
2111static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
2112{
2113 const struct sched_class *class;
2114
2115 if (p->sched_class == rq->curr->sched_class) {
2116 rq->curr->sched_class->check_preempt_curr(rq, p, flags);
2117 } else {
2118 for_each_class(class) {
2119 if (class == rq->curr->sched_class)
2120 break;
2121 if (class == p->sched_class) {
2122 resched_task(rq->curr);
2123 break;
2124 }
2125 }
2126 }
2127
2128 /*
2129 * A queue event has occurred, and we're going to schedule. In
2130 * this case, we can save a useless back to back clock update.
2131 */
2132 if (test_tsk_need_resched(rq->curr))
2133 rq->skip_clock_update = 1;
2134}
2135
2121#ifdef CONFIG_SMP 2136#ifdef CONFIG_SMP
2122/* 2137/*
2123 * Is this task likely cache-hot: 2138 * Is this task likely cache-hot:
@@ -6960,6 +6975,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
6960 if (cpu != group_first_cpu(sd->groups)) 6975 if (cpu != group_first_cpu(sd->groups))
6961 return; 6976 return;
6962 6977
6978 sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
6979
6963 child = sd->child; 6980 child = sd->child;
6964 6981
6965 sd->groups->cpu_power = 0; 6982 sd->groups->cpu_power = 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f4f6a8326dd0..00ebd7686676 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1654 struct cfs_rq *cfs_rq = task_cfs_rq(curr); 1654 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1655 int scale = cfs_rq->nr_running >= sched_nr_latency; 1655 int scale = cfs_rq->nr_running >= sched_nr_latency;
1656 1656
1657 if (unlikely(rt_prio(p->prio)))
1658 goto preempt;
1659
1660 if (unlikely(p->sched_class != &fair_sched_class))
1661 return;
1662
1663 if (unlikely(se == pse)) 1657 if (unlikely(se == pse))
1664 return; 1658 return;
1665 1659
@@ -1764,10 +1758,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
1764 set_task_cpu(p, this_cpu); 1758 set_task_cpu(p, this_cpu);
1765 activate_task(this_rq, p, 0); 1759 activate_task(this_rq, p, 0);
1766 check_preempt_curr(this_rq, p, 0); 1760 check_preempt_curr(this_rq, p, 0);
1767
1768 /* re-arm NEWIDLE balancing when moving tasks */
1769 src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost;
1770 this_rq->idle_stamp = 0;
1771} 1761}
1772 1762
1773/* 1763/*
@@ -2035,13 +2025,16 @@ struct sd_lb_stats {
2035 unsigned long this_load_per_task; 2025 unsigned long this_load_per_task;
2036 unsigned long this_nr_running; 2026 unsigned long this_nr_running;
2037 unsigned long this_has_capacity; 2027 unsigned long this_has_capacity;
2028 unsigned int this_idle_cpus;
2038 2029
2039 /* Statistics of the busiest group */ 2030 /* Statistics of the busiest group */
2031 unsigned int busiest_idle_cpus;
2040 unsigned long max_load; 2032 unsigned long max_load;
2041 unsigned long busiest_load_per_task; 2033 unsigned long busiest_load_per_task;
2042 unsigned long busiest_nr_running; 2034 unsigned long busiest_nr_running;
2043 unsigned long busiest_group_capacity; 2035 unsigned long busiest_group_capacity;
2044 unsigned long busiest_has_capacity; 2036 unsigned long busiest_has_capacity;
2037 unsigned int busiest_group_weight;
2045 2038
2046 int group_imb; /* Is there imbalance in this sd */ 2039 int group_imb; /* Is there imbalance in this sd */
2047#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 2040#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -2063,6 +2056,8 @@ struct sg_lb_stats {
2063 unsigned long sum_nr_running; /* Nr tasks running in the group */ 2056 unsigned long sum_nr_running; /* Nr tasks running in the group */
2064 unsigned long sum_weighted_load; /* Weighted load of group's tasks */ 2057 unsigned long sum_weighted_load; /* Weighted load of group's tasks */
2065 unsigned long group_capacity; 2058 unsigned long group_capacity;
2059 unsigned long idle_cpus;
2060 unsigned long group_weight;
2066 int group_imb; /* Is there an imbalance in the group ? */ 2061 int group_imb; /* Is there an imbalance in the group ? */
2067 int group_has_capacity; /* Is there extra capacity in the group? */ 2062 int group_has_capacity; /* Is there extra capacity in the group? */
2068}; 2063};
@@ -2431,7 +2426,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2431 sgs->group_load += load; 2426 sgs->group_load += load;
2432 sgs->sum_nr_running += rq->nr_running; 2427 sgs->sum_nr_running += rq->nr_running;
2433 sgs->sum_weighted_load += weighted_cpuload(i); 2428 sgs->sum_weighted_load += weighted_cpuload(i);
2434 2429 if (idle_cpu(i))
2430 sgs->idle_cpus++;
2435 } 2431 }
2436 2432
2437 /* 2433 /*
@@ -2469,6 +2465,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2469 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); 2465 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
2470 if (!sgs->group_capacity) 2466 if (!sgs->group_capacity)
2471 sgs->group_capacity = fix_small_capacity(sd, group); 2467 sgs->group_capacity = fix_small_capacity(sd, group);
2468 sgs->group_weight = group->group_weight;
2472 2469
2473 if (sgs->group_capacity > sgs->sum_nr_running) 2470 if (sgs->group_capacity > sgs->sum_nr_running)
2474 sgs->group_has_capacity = 1; 2471 sgs->group_has_capacity = 1;
@@ -2576,13 +2573,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
2576 sds->this_nr_running = sgs.sum_nr_running; 2573 sds->this_nr_running = sgs.sum_nr_running;
2577 sds->this_load_per_task = sgs.sum_weighted_load; 2574 sds->this_load_per_task = sgs.sum_weighted_load;
2578 sds->this_has_capacity = sgs.group_has_capacity; 2575 sds->this_has_capacity = sgs.group_has_capacity;
2576 sds->this_idle_cpus = sgs.idle_cpus;
2579 } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { 2577 } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
2580 sds->max_load = sgs.avg_load; 2578 sds->max_load = sgs.avg_load;
2581 sds->busiest = sg; 2579 sds->busiest = sg;
2582 sds->busiest_nr_running = sgs.sum_nr_running; 2580 sds->busiest_nr_running = sgs.sum_nr_running;
2581 sds->busiest_idle_cpus = sgs.idle_cpus;
2583 sds->busiest_group_capacity = sgs.group_capacity; 2582 sds->busiest_group_capacity = sgs.group_capacity;
2584 sds->busiest_load_per_task = sgs.sum_weighted_load; 2583 sds->busiest_load_per_task = sgs.sum_weighted_load;
2585 sds->busiest_has_capacity = sgs.group_has_capacity; 2584 sds->busiest_has_capacity = sgs.group_has_capacity;
2585 sds->busiest_group_weight = sgs.group_weight;
2586 sds->group_imb = sgs.group_imb; 2586 sds->group_imb = sgs.group_imb;
2587 } 2587 }
2588 2588
@@ -2860,8 +2860,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2860 if (sds.this_load >= sds.avg_load) 2860 if (sds.this_load >= sds.avg_load)
2861 goto out_balanced; 2861 goto out_balanced;
2862 2862
2863 if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) 2863 /*
2864 goto out_balanced; 2864 * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
2865 * And to check for busy balance use !idle_cpu instead of
2866 * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
2867 * even when they are idle.
2868 */
2869 if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
2870 if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
2871 goto out_balanced;
2872 } else {
2873 /*
2874 * This cpu is idle. If the busiest group load doesn't
2875 * have more tasks than the number of available cpu's and
2876 * there is no imbalance between this and busiest group
2877 * wrt to idle cpu's, it is balanced.
2878 */
2879 if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
2880 sds.busiest_nr_running <= sds.busiest_group_weight)
2881 goto out_balanced;
2882 }
2865 2883
2866force_balance: 2884force_balance:
2867 /* Looks like there is an imbalance. Compute it */ 2885 /* Looks like there is an imbalance. Compute it */
@@ -3197,8 +3215,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3197 interval = msecs_to_jiffies(sd->balance_interval); 3215 interval = msecs_to_jiffies(sd->balance_interval);
3198 if (time_after(next_balance, sd->last_balance + interval)) 3216 if (time_after(next_balance, sd->last_balance + interval))
3199 next_balance = sd->last_balance + interval; 3217 next_balance = sd->last_balance + interval;
3200 if (pulled_task) 3218 if (pulled_task) {
3219 this_rq->idle_stamp = 0;
3201 break; 3220 break;
3221 }
3202 } 3222 }
3203 3223
3204 raw_spin_lock(&this_rq->lock); 3224 raw_spin_lock(&this_rq->lock);
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 45bddc0c1048..2bf6b47058c1 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -19,14 +19,14 @@ select_task_rq_stop(struct rq *rq, struct task_struct *p,
19static void 19static void
20check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) 20check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
21{ 21{
22 resched_task(rq->curr); /* we preempt everything */ 22 /* we're never preempted */
23} 23}
24 24
25static struct task_struct *pick_next_task_stop(struct rq *rq) 25static struct task_struct *pick_next_task_stop(struct rq *rq)
26{ 26{
27 struct task_struct *stop = rq->stop; 27 struct task_struct *stop = rq->stop;
28 28
29 if (stop && stop->state == TASK_RUNNING) 29 if (stop && stop->se.on_rq)
30 return stop; 30 return stop;
31 31
32 return NULL; 32 return NULL;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b65bf634035e..5abfa1518554 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -702,7 +702,6 @@ static struct ctl_table kern_table[] = {
702 .extra1 = &zero, 702 .extra1 = &zero,
703 .extra2 = &ten_thousand, 703 .extra2 = &ten_thousand,
704 }, 704 },
705#endif
706 { 705 {
707 .procname = "dmesg_restrict", 706 .procname = "dmesg_restrict",
708 .data = &dmesg_restrict, 707 .data = &dmesg_restrict,
@@ -712,6 +711,7 @@ static struct ctl_table kern_table[] = {
712 .extra1 = &zero, 711 .extra1 = &zero,
713 .extra2 = &one, 712 .extra2 = &one,
714 }, 713 },
714#endif
715 { 715 {
716 .procname = "ngroups_max", 716 .procname = "ngroups_max",
717 .data = &ngroups_max, 717 .data = &ngroups_max,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e04b8bcdef88..ea37e2ff4164 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -126,7 +126,7 @@ if FTRACE
126config FUNCTION_TRACER 126config FUNCTION_TRACER
127 bool "Kernel Function Tracer" 127 bool "Kernel Function Tracer"
128 depends on HAVE_FUNCTION_TRACER 128 depends on HAVE_FUNCTION_TRACER
129 select FRAME_POINTER if (!ARM_UNWIND) 129 select FRAME_POINTER if !ARM_UNWIND && !S390
130 select KALLSYMS 130 select KALLSYMS
131 select GENERIC_TRACER 131 select GENERIC_TRACER
132 select CONTEXT_SWITCH_TRACER 132 select CONTEXT_SWITCH_TRACER
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 82d9b8106cd0..c380612273bf 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,7 +17,6 @@
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/smp_lock.h>
21#include <linux/notifier.h> 20#include <linux/notifier.h>
22#include <linux/irqflags.h> 21#include <linux/irqflags.h>
23#include <linux/debugfs.h> 22#include <linux/debugfs.h>
@@ -1284,6 +1283,8 @@ void trace_dump_stack(void)
1284 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); 1283 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1285} 1284}
1286 1285
1286static DEFINE_PER_CPU(int, user_stack_count);
1287
1287void 1288void
1288ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) 1289ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1289{ 1290{
@@ -1302,6 +1303,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1302 if (unlikely(in_nmi())) 1303 if (unlikely(in_nmi()))
1303 return; 1304 return;
1304 1305
1306 /*
1307 * prevent recursion, since the user stack tracing may
1308 * trigger other kernel events.
1309 */
1310 preempt_disable();
1311 if (__this_cpu_read(user_stack_count))
1312 goto out;
1313
1314 __this_cpu_inc(user_stack_count);
1315
1316
1317
1305 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1318 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1306 sizeof(*entry), flags, pc); 1319 sizeof(*entry), flags, pc);
1307 if (!event) 1320 if (!event)
@@ -1319,6 +1332,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1319 save_stack_trace_user(&trace); 1332 save_stack_trace_user(&trace);
1320 if (!filter_check_discard(call, entry, buffer, event)) 1333 if (!filter_check_discard(call, entry, buffer, event))
1321 ring_buffer_unlock_commit(buffer, event); 1334 ring_buffer_unlock_commit(buffer, event);
1335
1336 __this_cpu_dec(user_stack_count);
1337
1338 out:
1339 preempt_enable();
1322} 1340}
1323 1341
1324#ifdef UNUSED 1342#ifdef UNUSED