aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJames Morris <jmorris@namei.org>2010-11-28 16:27:07 -0500
committerJames Morris <jmorris@namei.org>2010-11-28 16:27:07 -0500
commit1d6d75684d869406e5bb2ac5d3ed9454f52d0cab (patch)
treeafb229254bed6415407b7b7d4641f9f792109966 /kernel
parent074e61ec3751da9ab88ee66d3818574556c03489 (diff)
parent0f639a3c5ca63dd76ee07de9b02ebf0178ce9a17 (diff)
Merge branch 'master' into next
Diffstat (limited to 'kernel')
-rw-r--r--kernel/debug/kdb/kdb_main.c21
-rw-r--r--kernel/futex.c3
-rw-r--r--kernel/futex_compat.c3
-rw-r--r--kernel/hw_breakpoint.c3
-rw-r--r--kernel/irq_work.c4
-rw-r--r--kernel/latencytop.c17
-rw-r--r--kernel/module.c12
-rw-r--r--kernel/perf_event.c111
-rw-r--r--kernel/pm_qos_params.c4
-rw-r--r--kernel/posix-cpu-timers.c12
-rw-r--r--kernel/power/Kconfig4
-rw-r--r--kernel/printk.c21
-rw-r--r--kernel/range.c2
-rw-r--r--kernel/sched.c39
-rw-r--r--kernel/sched_fair.c48
-rw-r--r--kernel/sched_stoptask.c4
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--kernel/trace/trace.c20
20 files changed, 256 insertions, 87 deletions
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 37755d621924..a6e729766821 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -82,7 +82,7 @@ static kdbtab_t kdb_base_commands[50];
82#define for_each_kdbcmd(cmd, num) \ 82#define for_each_kdbcmd(cmd, num) \
83 for ((cmd) = kdb_base_commands, (num) = 0; \ 83 for ((cmd) = kdb_base_commands, (num) = 0; \
84 num < kdb_max_commands; \ 84 num < kdb_max_commands; \
85 num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++, num++) 85 num++, num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++)
86 86
87typedef struct _kdbmsg { 87typedef struct _kdbmsg {
88 int km_diag; /* kdb diagnostic */ 88 int km_diag; /* kdb diagnostic */
@@ -646,7 +646,7 @@ static int kdb_defcmd2(const char *cmdstr, const char *argv0)
646 } 646 }
647 if (!s->usable) 647 if (!s->usable)
648 return KDB_NOTIMP; 648 return KDB_NOTIMP;
649 s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB); 649 s->command = kzalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);
650 if (!s->command) { 650 if (!s->command) {
651 kdb_printf("Could not allocate new kdb_defcmd table for %s\n", 651 kdb_printf("Could not allocate new kdb_defcmd table for %s\n",
652 cmdstr); 652 cmdstr);
@@ -2361,7 +2361,7 @@ static int kdb_pid(int argc, const char **argv)
2361 */ 2361 */
2362static int kdb_ll(int argc, const char **argv) 2362static int kdb_ll(int argc, const char **argv)
2363{ 2363{
2364 int diag; 2364 int diag = 0;
2365 unsigned long addr; 2365 unsigned long addr;
2366 long offset = 0; 2366 long offset = 0;
2367 unsigned long va; 2367 unsigned long va;
@@ -2400,20 +2400,21 @@ static int kdb_ll(int argc, const char **argv)
2400 char buf[80]; 2400 char buf[80];
2401 2401
2402 if (KDB_FLAG(CMD_INTERRUPT)) 2402 if (KDB_FLAG(CMD_INTERRUPT))
2403 return 0; 2403 goto out;
2404 2404
2405 sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va); 2405 sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va);
2406 diag = kdb_parse(buf); 2406 diag = kdb_parse(buf);
2407 if (diag) 2407 if (diag)
2408 return diag; 2408 goto out;
2409 2409
2410 addr = va + linkoffset; 2410 addr = va + linkoffset;
2411 if (kdb_getword(&va, addr, sizeof(va))) 2411 if (kdb_getword(&va, addr, sizeof(va)))
2412 return 0; 2412 goto out;
2413 } 2413 }
2414 kfree(command);
2415 2414
2416 return 0; 2415out:
2416 kfree(command);
2417 return diag;
2417} 2418}
2418 2419
2419static int kdb_kgdb(int argc, const char **argv) 2420static int kdb_kgdb(int argc, const char **argv)
@@ -2739,13 +2740,13 @@ int kdb_register_repeat(char *cmd,
2739 } 2740 }
2740 if (kdb_commands) { 2741 if (kdb_commands) {
2741 memcpy(new, kdb_commands, 2742 memcpy(new, kdb_commands,
2742 kdb_max_commands * sizeof(*new)); 2743 (kdb_max_commands - KDB_BASE_CMD_MAX) * sizeof(*new));
2743 kfree(kdb_commands); 2744 kfree(kdb_commands);
2744 } 2745 }
2745 memset(new + kdb_max_commands, 0, 2746 memset(new + kdb_max_commands, 0,
2746 kdb_command_extend * sizeof(*new)); 2747 kdb_command_extend * sizeof(*new));
2747 kdb_commands = new; 2748 kdb_commands = new;
2748 kp = kdb_commands + kdb_max_commands; 2749 kp = kdb_commands + kdb_max_commands - KDB_BASE_CMD_MAX;
2749 kdb_max_commands += kdb_command_extend; 2750 kdb_max_commands += kdb_command_extend;
2750 } 2751 }
2751 2752
diff --git a/kernel/futex.c b/kernel/futex.c
index 6c683b37f2ce..40a8777a27d0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2489,7 +2489,8 @@ void exit_robust_list(struct task_struct *curr)
2489{ 2489{
2490 struct robust_list_head __user *head = curr->robust_list; 2490 struct robust_list_head __user *head = curr->robust_list;
2491 struct robust_list __user *entry, *next_entry, *pending; 2491 struct robust_list __user *entry, *next_entry, *pending;
2492 unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; 2492 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2493 unsigned int uninitialized_var(next_pi);
2493 unsigned long futex_offset; 2494 unsigned long futex_offset;
2494 int rc; 2495 int rc;
2495 2496
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 06da4dfc339b..a7934ac75e5b 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -49,7 +49,8 @@ void compat_exit_robust_list(struct task_struct *curr)
49{ 49{
50 struct compat_robust_list_head __user *head = curr->compat_robust_list; 50 struct compat_robust_list_head __user *head = curr->compat_robust_list;
51 struct robust_list __user *entry, *next_entry, *pending; 51 struct robust_list __user *entry, *next_entry, *pending;
52 unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; 52 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
53 unsigned int uninitialized_var(next_pi);
53 compat_uptr_t uentry, next_uentry, upending; 54 compat_uptr_t uentry, next_uentry, upending;
54 compat_long_t futex_offset; 55 compat_long_t futex_offset;
55 int rc; 56 int rc;
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 2c9120f0afca..e5325825aeb6 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = {
620 .read = hw_breakpoint_pmu_read, 620 .read = hw_breakpoint_pmu_read,
621}; 621};
622 622
623static int __init init_hw_breakpoint(void) 623int __init init_hw_breakpoint(void)
624{ 624{
625 unsigned int **task_bp_pinned; 625 unsigned int **task_bp_pinned;
626 int cpu, err_cpu; 626 int cpu, err_cpu;
@@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void)
655 655
656 return -ENOMEM; 656 return -ENOMEM;
657} 657}
658core_initcall(init_hw_breakpoint);
659 658
660 659
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index f16763ff8481..90f881904bb1 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -145,7 +145,9 @@ void irq_work_run(void)
145 * Clear the BUSY bit and return to the free state if 145 * Clear the BUSY bit and return to the free state if
146 * no-one else claimed it meanwhile. 146 * no-one else claimed it meanwhile.
147 */ 147 */
148 cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL); 148 (void)cmpxchg(&entry->next,
149 next_flags(NULL, IRQ_WORK_BUSY),
150 NULL);
149 } 151 }
150} 152}
151EXPORT_SYMBOL_GPL(irq_work_run); 153EXPORT_SYMBOL_GPL(irq_work_run);
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 877fb306d415..17110a4a4fc2 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -194,14 +194,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
194 194
195 account_global_scheduler_latency(tsk, &lat); 195 account_global_scheduler_latency(tsk, &lat);
196 196
197 /* 197 for (i = 0; i < tsk->latency_record_count; i++) {
198 * short term hack; if we're > 32 we stop; future we recycle:
199 */
200 tsk->latency_record_count++;
201 if (tsk->latency_record_count >= LT_SAVECOUNT)
202 goto out_unlock;
203
204 for (i = 0; i < LT_SAVECOUNT; i++) {
205 struct latency_record *mylat; 198 struct latency_record *mylat;
206 int same = 1; 199 int same = 1;
207 200
@@ -227,8 +220,14 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
227 } 220 }
228 } 221 }
229 222
223 /*
224 * short term hack; if we're > 32 we stop; future we recycle:
225 */
226 if (tsk->latency_record_count >= LT_SAVECOUNT)
227 goto out_unlock;
228
230 /* Allocated a new one: */ 229 /* Allocated a new one: */
231 i = tsk->latency_record_count; 230 i = tsk->latency_record_count++;
232 memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); 231 memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
233 232
234out_unlock: 233out_unlock:
diff --git a/kernel/module.c b/kernel/module.c
index 437a74a7524a..d190664f25ff 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info)
2326 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * 2326 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
2327 mod->num_trace_events, GFP_KERNEL); 2327 mod->num_trace_events, GFP_KERNEL);
2328#endif 2328#endif
2329#ifdef CONFIG_TRACING
2330 mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
2331 sizeof(*mod->trace_bprintk_fmt_start),
2332 &mod->num_trace_bprintk_fmt);
2333 /*
2334 * This section contains pointers to allocated objects in the trace
2335 * code and not scanning it leads to false positives.
2336 */
2337 kmemleak_scan_area(mod->trace_bprintk_fmt_start,
2338 sizeof(*mod->trace_bprintk_fmt_start) *
2339 mod->num_trace_bprintk_fmt, GFP_KERNEL);
2340#endif
2329#ifdef CONFIG_FTRACE_MCOUNT_RECORD 2341#ifdef CONFIG_FTRACE_MCOUNT_RECORD
2330 /* sechdrs[0].sh_size is always zero */ 2342 /* sechdrs[0].sh_size is always zero */
2331 mod->ftrace_callsites = section_objs(info, "__mcount_loc", 2343 mod->ftrace_callsites = section_objs(info, "__mcount_loc",
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 517d827f4982..671f6c8c8a32 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,6 +31,7 @@
31#include <linux/kernel_stat.h> 31#include <linux/kernel_stat.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/ftrace_event.h> 33#include <linux/ftrace_event.h>
34#include <linux/hw_breakpoint.h>
34 35
35#include <asm/irq_regs.h> 36#include <asm/irq_regs.h>
36 37
@@ -674,6 +675,8 @@ event_sched_in(struct perf_event *event,
674 675
675 event->tstamp_running += ctx->time - event->tstamp_stopped; 676 event->tstamp_running += ctx->time - event->tstamp_stopped;
676 677
678 event->shadow_ctx_time = ctx->time - ctx->timestamp;
679
677 if (!is_software_event(event)) 680 if (!is_software_event(event))
678 cpuctx->active_oncpu++; 681 cpuctx->active_oncpu++;
679 ctx->nr_active++; 682 ctx->nr_active++;
@@ -2232,11 +2235,6 @@ int perf_event_release_kernel(struct perf_event *event)
2232 raw_spin_unlock_irq(&ctx->lock); 2235 raw_spin_unlock_irq(&ctx->lock);
2233 mutex_unlock(&ctx->mutex); 2236 mutex_unlock(&ctx->mutex);
2234 2237
2235 mutex_lock(&event->owner->perf_event_mutex);
2236 list_del_init(&event->owner_entry);
2237 mutex_unlock(&event->owner->perf_event_mutex);
2238 put_task_struct(event->owner);
2239
2240 free_event(event); 2238 free_event(event);
2241 2239
2242 return 0; 2240 return 0;
@@ -2249,9 +2247,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
2249static int perf_release(struct inode *inode, struct file *file) 2247static int perf_release(struct inode *inode, struct file *file)
2250{ 2248{
2251 struct perf_event *event = file->private_data; 2249 struct perf_event *event = file->private_data;
2250 struct task_struct *owner;
2252 2251
2253 file->private_data = NULL; 2252 file->private_data = NULL;
2254 2253
2254 rcu_read_lock();
2255 owner = ACCESS_ONCE(event->owner);
2256 /*
2257 * Matches the smp_wmb() in perf_event_exit_task(). If we observe
2258 * !owner it means the list deletion is complete and we can indeed
2259 * free this event, otherwise we need to serialize on
2260 * owner->perf_event_mutex.
2261 */
2262 smp_read_barrier_depends();
2263 if (owner) {
2264 /*
2265 * Since delayed_put_task_struct() also drops the last
2266 * task reference we can safely take a new reference
2267 * while holding the rcu_read_lock().
2268 */
2269 get_task_struct(owner);
2270 }
2271 rcu_read_unlock();
2272
2273 if (owner) {
2274 mutex_lock(&owner->perf_event_mutex);
2275 /*
2276 * We have to re-check the event->owner field, if it is cleared
2277 * we raced with perf_event_exit_task(), acquiring the mutex
2278 * ensured they're done, and we can proceed with freeing the
2279 * event.
2280 */
2281 if (event->owner)
2282 list_del_init(&event->owner_entry);
2283 mutex_unlock(&owner->perf_event_mutex);
2284 put_task_struct(owner);
2285 }
2286
2255 return perf_event_release_kernel(event); 2287 return perf_event_release_kernel(event);
2256} 2288}
2257 2289
@@ -3396,7 +3428,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
3396} 3428}
3397 3429
3398static void perf_output_read_one(struct perf_output_handle *handle, 3430static void perf_output_read_one(struct perf_output_handle *handle,
3399 struct perf_event *event) 3431 struct perf_event *event,
3432 u64 enabled, u64 running)
3400{ 3433{
3401 u64 read_format = event->attr.read_format; 3434 u64 read_format = event->attr.read_format;
3402 u64 values[4]; 3435 u64 values[4];
@@ -3404,11 +3437,11 @@ static void perf_output_read_one(struct perf_output_handle *handle,
3404 3437
3405 values[n++] = perf_event_count(event); 3438 values[n++] = perf_event_count(event);
3406 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 3439 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
3407 values[n++] = event->total_time_enabled + 3440 values[n++] = enabled +
3408 atomic64_read(&event->child_total_time_enabled); 3441 atomic64_read(&event->child_total_time_enabled);
3409 } 3442 }
3410 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { 3443 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
3411 values[n++] = event->total_time_running + 3444 values[n++] = running +
3412 atomic64_read(&event->child_total_time_running); 3445 atomic64_read(&event->child_total_time_running);
3413 } 3446 }
3414 if (read_format & PERF_FORMAT_ID) 3447 if (read_format & PERF_FORMAT_ID)
@@ -3421,7 +3454,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
3421 * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. 3454 * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
3422 */ 3455 */
3423static void perf_output_read_group(struct perf_output_handle *handle, 3456static void perf_output_read_group(struct perf_output_handle *handle,
3424 struct perf_event *event) 3457 struct perf_event *event,
3458 u64 enabled, u64 running)
3425{ 3459{
3426 struct perf_event *leader = event->group_leader, *sub; 3460 struct perf_event *leader = event->group_leader, *sub;
3427 u64 read_format = event->attr.read_format; 3461 u64 read_format = event->attr.read_format;
@@ -3431,10 +3465,10 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3431 values[n++] = 1 + leader->nr_siblings; 3465 values[n++] = 1 + leader->nr_siblings;
3432 3466
3433 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 3467 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
3434 values[n++] = leader->total_time_enabled; 3468 values[n++] = enabled;
3435 3469
3436 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 3470 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
3437 values[n++] = leader->total_time_running; 3471 values[n++] = running;
3438 3472
3439 if (leader != event) 3473 if (leader != event)
3440 leader->pmu->read(leader); 3474 leader->pmu->read(leader);
@@ -3459,13 +3493,35 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3459 } 3493 }
3460} 3494}
3461 3495
3496#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
3497 PERF_FORMAT_TOTAL_TIME_RUNNING)
3498
3462static void perf_output_read(struct perf_output_handle *handle, 3499static void perf_output_read(struct perf_output_handle *handle,
3463 struct perf_event *event) 3500 struct perf_event *event)
3464{ 3501{
3502 u64 enabled = 0, running = 0, now, ctx_time;
3503 u64 read_format = event->attr.read_format;
3504
3505 /*
3506 * compute total_time_enabled, total_time_running
3507 * based on snapshot values taken when the event
3508 * was last scheduled in.
3509 *
3510 * we cannot simply called update_context_time()
3511 * because of locking issue as we are called in
3512 * NMI context
3513 */
3514 if (read_format & PERF_FORMAT_TOTAL_TIMES) {
3515 now = perf_clock();
3516 ctx_time = event->shadow_ctx_time + now;
3517 enabled = ctx_time - event->tstamp_enabled;
3518 running = ctx_time - event->tstamp_running;
3519 }
3520
3465 if (event->attr.read_format & PERF_FORMAT_GROUP) 3521 if (event->attr.read_format & PERF_FORMAT_GROUP)
3466 perf_output_read_group(handle, event); 3522 perf_output_read_group(handle, event, enabled, running);
3467 else 3523 else
3468 perf_output_read_one(handle, event); 3524 perf_output_read_one(handle, event, enabled, running);
3469} 3525}
3470 3526
3471void perf_output_sample(struct perf_output_handle *handle, 3527void perf_output_sample(struct perf_output_handle *handle,
@@ -5651,7 +5707,7 @@ SYSCALL_DEFINE5(perf_event_open,
5651 mutex_unlock(&ctx->mutex); 5707 mutex_unlock(&ctx->mutex);
5652 5708
5653 event->owner = current; 5709 event->owner = current;
5654 get_task_struct(current); 5710
5655 mutex_lock(&current->perf_event_mutex); 5711 mutex_lock(&current->perf_event_mutex);
5656 list_add_tail(&event->owner_entry, &current->perf_event_list); 5712 list_add_tail(&event->owner_entry, &current->perf_event_list);
5657 mutex_unlock(&current->perf_event_mutex); 5713 mutex_unlock(&current->perf_event_mutex);
@@ -5719,12 +5775,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
5719 ++ctx->generation; 5775 ++ctx->generation;
5720 mutex_unlock(&ctx->mutex); 5776 mutex_unlock(&ctx->mutex);
5721 5777
5722 event->owner = current;
5723 get_task_struct(current);
5724 mutex_lock(&current->perf_event_mutex);
5725 list_add_tail(&event->owner_entry, &current->perf_event_list);
5726 mutex_unlock(&current->perf_event_mutex);
5727
5728 return event; 5778 return event;
5729 5779
5730err_free: 5780err_free:
@@ -5875,8 +5925,24 @@ again:
5875 */ 5925 */
5876void perf_event_exit_task(struct task_struct *child) 5926void perf_event_exit_task(struct task_struct *child)
5877{ 5927{
5928 struct perf_event *event, *tmp;
5878 int ctxn; 5929 int ctxn;
5879 5930
5931 mutex_lock(&child->perf_event_mutex);
5932 list_for_each_entry_safe(event, tmp, &child->perf_event_list,
5933 owner_entry) {
5934 list_del_init(&event->owner_entry);
5935
5936 /*
5937 * Ensure the list deletion is visible before we clear
5938 * the owner, closes a race against perf_release() where
5939 * we need to serialize on the owner->perf_event_mutex.
5940 */
5941 smp_wmb();
5942 event->owner = NULL;
5943 }
5944 mutex_unlock(&child->perf_event_mutex);
5945
5880 for_each_task_context_nr(ctxn) 5946 for_each_task_context_nr(ctxn)
5881 perf_event_exit_task_context(child, ctxn); 5947 perf_event_exit_task_context(child, ctxn);
5882} 5948}
@@ -6295,6 +6361,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
6295 6361
6296void __init perf_event_init(void) 6362void __init perf_event_init(void)
6297{ 6363{
6364 int ret;
6365
6298 perf_event_init_all_cpus(); 6366 perf_event_init_all_cpus();
6299 init_srcu_struct(&pmus_srcu); 6367 init_srcu_struct(&pmus_srcu);
6300 perf_pmu_register(&perf_swevent); 6368 perf_pmu_register(&perf_swevent);
@@ -6302,4 +6370,7 @@ void __init perf_event_init(void)
6302 perf_pmu_register(&perf_task_clock); 6370 perf_pmu_register(&perf_task_clock);
6303 perf_tp_register(); 6371 perf_tp_register();
6304 perf_cpu_notifier(perf_cpu_notify); 6372 perf_cpu_notifier(perf_cpu_notify);
6373
6374 ret = init_hw_breakpoint();
6375 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
6305} 6376}
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index c7a8f453919e..aeaa7f846821 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -121,10 +121,10 @@ static inline int pm_qos_get_value(struct pm_qos_object *o)
121 121
122 switch (o->type) { 122 switch (o->type) {
123 case PM_QOS_MIN: 123 case PM_QOS_MIN:
124 return plist_last(&o->requests)->prio; 124 return plist_first(&o->requests)->prio;
125 125
126 case PM_QOS_MAX: 126 case PM_QOS_MAX:
127 return plist_first(&o->requests)->prio; 127 return plist_last(&o->requests)->prio;
128 128
129 default: 129 default:
130 /* runtime check for not using enum */ 130 /* runtime check for not using enum */
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 6842eeba5879..05bb7173850e 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -37,13 +37,13 @@ static int check_clock(const clockid_t which_clock)
37 if (pid == 0) 37 if (pid == 0)
38 return 0; 38 return 0;
39 39
40 read_lock(&tasklist_lock); 40 rcu_read_lock();
41 p = find_task_by_vpid(pid); 41 p = find_task_by_vpid(pid);
42 if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? 42 if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
43 same_thread_group(p, current) : thread_group_leader(p))) { 43 same_thread_group(p, current) : has_group_leader_pid(p))) {
44 error = -EINVAL; 44 error = -EINVAL;
45 } 45 }
46 read_unlock(&tasklist_lock); 46 rcu_read_unlock();
47 47
48 return error; 48 return error;
49} 49}
@@ -390,7 +390,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
390 390
391 INIT_LIST_HEAD(&new_timer->it.cpu.entry); 391 INIT_LIST_HEAD(&new_timer->it.cpu.entry);
392 392
393 read_lock(&tasklist_lock); 393 rcu_read_lock();
394 if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { 394 if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
395 if (pid == 0) { 395 if (pid == 0) {
396 p = current; 396 p = current;
@@ -404,7 +404,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
404 p = current->group_leader; 404 p = current->group_leader;
405 } else { 405 } else {
406 p = find_task_by_vpid(pid); 406 p = find_task_by_vpid(pid);
407 if (p && !thread_group_leader(p)) 407 if (p && !has_group_leader_pid(p))
408 p = NULL; 408 p = NULL;
409 } 409 }
410 } 410 }
@@ -414,7 +414,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
414 } else { 414 } else {
415 ret = -EINVAL; 415 ret = -EINVAL;
416 } 416 }
417 read_unlock(&tasklist_lock); 417 rcu_read_unlock();
418 418
419 return ret; 419 return ret;
420} 420}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 29bff6117abc..a5aff3ebad38 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -246,9 +246,13 @@ config PM_OPS
246 depends on PM_SLEEP || PM_RUNTIME 246 depends on PM_SLEEP || PM_RUNTIME
247 default y 247 default y
248 248
249config ARCH_HAS_OPP
250 bool
251
249config PM_OPP 252config PM_OPP
250 bool "Operating Performance Point (OPP) Layer library" 253 bool "Operating Performance Point (OPP) Layer library"
251 depends on PM 254 depends on PM
255 depends on ARCH_HAS_OPP
252 ---help--- 256 ---help---
253 SOCs have a standard set of tuples consisting of frequency and 257 SOCs have a standard set of tuples consisting of frequency and
254 voltage pairs that the device will support per voltage domain. This 258 voltage pairs that the device will support per voltage domain. This
diff --git a/kernel/printk.c b/kernel/printk.c
index b2ebaee8c377..9a2264fc42ca 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -261,6 +261,12 @@ static inline void boot_delay_msec(void)
261} 261}
262#endif 262#endif
263 263
264#ifdef CONFIG_SECURITY_DMESG_RESTRICT
265int dmesg_restrict = 1;
266#else
267int dmesg_restrict;
268#endif
269
264int do_syslog(int type, char __user *buf, int len, bool from_file) 270int do_syslog(int type, char __user *buf, int len, bool from_file)
265{ 271{
266 unsigned i, j, limit, count; 272 unsigned i, j, limit, count;
@@ -268,7 +274,20 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
268 char c; 274 char c;
269 int error = 0; 275 int error = 0;
270 276
271 error = security_syslog(type, from_file); 277 /*
278 * If this is from /proc/kmsg we only do the capabilities checks
279 * at open time.
280 */
281 if (type == SYSLOG_ACTION_OPEN || !from_file) {
282 if (dmesg_restrict && !capable(CAP_SYS_ADMIN))
283 return -EPERM;
284 if ((type != SYSLOG_ACTION_READ_ALL &&
285 type != SYSLOG_ACTION_SIZE_BUFFER) &&
286 !capable(CAP_SYS_ADMIN))
287 return -EPERM;
288 }
289
290 error = security_syslog(type);
272 if (error) 291 if (error)
273 return error; 292 return error;
274 293
diff --git a/kernel/range.c b/kernel/range.c
index 471b66acabb5..37fa9b99ad58 100644
--- a/kernel/range.c
+++ b/kernel/range.c
@@ -119,7 +119,7 @@ static int cmp_range(const void *x1, const void *x2)
119 119
120int clean_sort_range(struct range *range, int az) 120int clean_sort_range(struct range *range, int az)
121{ 121{
122 int i, j, k = az - 1, nr_range = 0; 122 int i, j, k = az - 1, nr_range = az;
123 123
124 for (i = 0; i < k; i++) { 124 for (i = 0; i < k; i++) {
125 if (range[i].end) 125 if (range[i].end)
diff --git a/kernel/sched.c b/kernel/sched.c
index aa14a56f9d03..dc91a4d09ac3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -560,18 +560,8 @@ struct rq {
560 560
561static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 561static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
562 562
563static inline
564void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
565{
566 rq->curr->sched_class->check_preempt_curr(rq, p, flags);
567 563
568 /* 564static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
569 * A queue event has occurred, and we're going to schedule. In
570 * this case, we can save a useless back to back clock update.
571 */
572 if (test_tsk_need_resched(p))
573 rq->skip_clock_update = 1;
574}
575 565
576static inline int cpu_of(struct rq *rq) 566static inline int cpu_of(struct rq *rq)
577{ 567{
@@ -2118,6 +2108,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
2118 p->sched_class->prio_changed(rq, p, oldprio, running); 2108 p->sched_class->prio_changed(rq, p, oldprio, running);
2119} 2109}
2120 2110
2111static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
2112{
2113 const struct sched_class *class;
2114
2115 if (p->sched_class == rq->curr->sched_class) {
2116 rq->curr->sched_class->check_preempt_curr(rq, p, flags);
2117 } else {
2118 for_each_class(class) {
2119 if (class == rq->curr->sched_class)
2120 break;
2121 if (class == p->sched_class) {
2122 resched_task(rq->curr);
2123 break;
2124 }
2125 }
2126 }
2127
2128 /*
2129 * A queue event has occurred, and we're going to schedule. In
2130 * this case, we can save a useless back to back clock update.
2131 */
2132 if (test_tsk_need_resched(rq->curr))
2133 rq->skip_clock_update = 1;
2134}
2135
2121#ifdef CONFIG_SMP 2136#ifdef CONFIG_SMP
2122/* 2137/*
2123 * Is this task likely cache-hot: 2138 * Is this task likely cache-hot:
@@ -6960,6 +6975,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
6960 if (cpu != group_first_cpu(sd->groups)) 6975 if (cpu != group_first_cpu(sd->groups))
6961 return; 6976 return;
6962 6977
6978 sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
6979
6963 child = sd->child; 6980 child = sd->child;
6964 6981
6965 sd->groups->cpu_power = 0; 6982 sd->groups->cpu_power = 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f4f6a8326dd0..00ebd7686676 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1654 struct cfs_rq *cfs_rq = task_cfs_rq(curr); 1654 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1655 int scale = cfs_rq->nr_running >= sched_nr_latency; 1655 int scale = cfs_rq->nr_running >= sched_nr_latency;
1656 1656
1657 if (unlikely(rt_prio(p->prio)))
1658 goto preempt;
1659
1660 if (unlikely(p->sched_class != &fair_sched_class))
1661 return;
1662
1663 if (unlikely(se == pse)) 1657 if (unlikely(se == pse))
1664 return; 1658 return;
1665 1659
@@ -1764,10 +1758,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
1764 set_task_cpu(p, this_cpu); 1758 set_task_cpu(p, this_cpu);
1765 activate_task(this_rq, p, 0); 1759 activate_task(this_rq, p, 0);
1766 check_preempt_curr(this_rq, p, 0); 1760 check_preempt_curr(this_rq, p, 0);
1767
1768 /* re-arm NEWIDLE balancing when moving tasks */
1769 src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost;
1770 this_rq->idle_stamp = 0;
1771} 1761}
1772 1762
1773/* 1763/*
@@ -2035,13 +2025,16 @@ struct sd_lb_stats {
2035 unsigned long this_load_per_task; 2025 unsigned long this_load_per_task;
2036 unsigned long this_nr_running; 2026 unsigned long this_nr_running;
2037 unsigned long this_has_capacity; 2027 unsigned long this_has_capacity;
2028 unsigned int this_idle_cpus;
2038 2029
2039 /* Statistics of the busiest group */ 2030 /* Statistics of the busiest group */
2031 unsigned int busiest_idle_cpus;
2040 unsigned long max_load; 2032 unsigned long max_load;
2041 unsigned long busiest_load_per_task; 2033 unsigned long busiest_load_per_task;
2042 unsigned long busiest_nr_running; 2034 unsigned long busiest_nr_running;
2043 unsigned long busiest_group_capacity; 2035 unsigned long busiest_group_capacity;
2044 unsigned long busiest_has_capacity; 2036 unsigned long busiest_has_capacity;
2037 unsigned int busiest_group_weight;
2045 2038
2046 int group_imb; /* Is there imbalance in this sd */ 2039 int group_imb; /* Is there imbalance in this sd */
2047#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 2040#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -2063,6 +2056,8 @@ struct sg_lb_stats {
2063 unsigned long sum_nr_running; /* Nr tasks running in the group */ 2056 unsigned long sum_nr_running; /* Nr tasks running in the group */
2064 unsigned long sum_weighted_load; /* Weighted load of group's tasks */ 2057 unsigned long sum_weighted_load; /* Weighted load of group's tasks */
2065 unsigned long group_capacity; 2058 unsigned long group_capacity;
2059 unsigned long idle_cpus;
2060 unsigned long group_weight;
2066 int group_imb; /* Is there an imbalance in the group ? */ 2061 int group_imb; /* Is there an imbalance in the group ? */
2067 int group_has_capacity; /* Is there extra capacity in the group? */ 2062 int group_has_capacity; /* Is there extra capacity in the group? */
2068}; 2063};
@@ -2431,7 +2426,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2431 sgs->group_load += load; 2426 sgs->group_load += load;
2432 sgs->sum_nr_running += rq->nr_running; 2427 sgs->sum_nr_running += rq->nr_running;
2433 sgs->sum_weighted_load += weighted_cpuload(i); 2428 sgs->sum_weighted_load += weighted_cpuload(i);
2434 2429 if (idle_cpu(i))
2430 sgs->idle_cpus++;
2435 } 2431 }
2436 2432
2437 /* 2433 /*
@@ -2469,6 +2465,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2469 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); 2465 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
2470 if (!sgs->group_capacity) 2466 if (!sgs->group_capacity)
2471 sgs->group_capacity = fix_small_capacity(sd, group); 2467 sgs->group_capacity = fix_small_capacity(sd, group);
2468 sgs->group_weight = group->group_weight;
2472 2469
2473 if (sgs->group_capacity > sgs->sum_nr_running) 2470 if (sgs->group_capacity > sgs->sum_nr_running)
2474 sgs->group_has_capacity = 1; 2471 sgs->group_has_capacity = 1;
@@ -2576,13 +2573,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
2576 sds->this_nr_running = sgs.sum_nr_running; 2573 sds->this_nr_running = sgs.sum_nr_running;
2577 sds->this_load_per_task = sgs.sum_weighted_load; 2574 sds->this_load_per_task = sgs.sum_weighted_load;
2578 sds->this_has_capacity = sgs.group_has_capacity; 2575 sds->this_has_capacity = sgs.group_has_capacity;
2576 sds->this_idle_cpus = sgs.idle_cpus;
2579 } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { 2577 } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
2580 sds->max_load = sgs.avg_load; 2578 sds->max_load = sgs.avg_load;
2581 sds->busiest = sg; 2579 sds->busiest = sg;
2582 sds->busiest_nr_running = sgs.sum_nr_running; 2580 sds->busiest_nr_running = sgs.sum_nr_running;
2581 sds->busiest_idle_cpus = sgs.idle_cpus;
2583 sds->busiest_group_capacity = sgs.group_capacity; 2582 sds->busiest_group_capacity = sgs.group_capacity;
2584 sds->busiest_load_per_task = sgs.sum_weighted_load; 2583 sds->busiest_load_per_task = sgs.sum_weighted_load;
2585 sds->busiest_has_capacity = sgs.group_has_capacity; 2584 sds->busiest_has_capacity = sgs.group_has_capacity;
2585 sds->busiest_group_weight = sgs.group_weight;
2586 sds->group_imb = sgs.group_imb; 2586 sds->group_imb = sgs.group_imb;
2587 } 2587 }
2588 2588
@@ -2860,8 +2860,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2860 if (sds.this_load >= sds.avg_load) 2860 if (sds.this_load >= sds.avg_load)
2861 goto out_balanced; 2861 goto out_balanced;
2862 2862
2863 if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) 2863 /*
2864 goto out_balanced; 2864 * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
2865 * And to check for busy balance use !idle_cpu instead of
2866 * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
2867 * even when they are idle.
2868 */
2869 if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
2870 if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
2871 goto out_balanced;
2872 } else {
2873 /*
2874 * This cpu is idle. If the busiest group load doesn't
2875 * have more tasks than the number of available cpu's and
2876 * there is no imbalance between this and busiest group
2877 * wrt to idle cpu's, it is balanced.
2878 */
2879 if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
2880 sds.busiest_nr_running <= sds.busiest_group_weight)
2881 goto out_balanced;
2882 }
2865 2883
2866force_balance: 2884force_balance:
2867 /* Looks like there is an imbalance. Compute it */ 2885 /* Looks like there is an imbalance. Compute it */
@@ -3197,8 +3215,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3197 interval = msecs_to_jiffies(sd->balance_interval); 3215 interval = msecs_to_jiffies(sd->balance_interval);
3198 if (time_after(next_balance, sd->last_balance + interval)) 3216 if (time_after(next_balance, sd->last_balance + interval))
3199 next_balance = sd->last_balance + interval; 3217 next_balance = sd->last_balance + interval;
3200 if (pulled_task) 3218 if (pulled_task) {
3219 this_rq->idle_stamp = 0;
3201 break; 3220 break;
3221 }
3202 } 3222 }
3203 3223
3204 raw_spin_lock(&this_rq->lock); 3224 raw_spin_lock(&this_rq->lock);
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 45bddc0c1048..2bf6b47058c1 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -19,14 +19,14 @@ select_task_rq_stop(struct rq *rq, struct task_struct *p,
19static void 19static void
20check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) 20check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
21{ 21{
22 resched_task(rq->curr); /* we preempt everything */ 22 /* we're never preempted */
23} 23}
24 24
25static struct task_struct *pick_next_task_stop(struct rq *rq) 25static struct task_struct *pick_next_task_stop(struct rq *rq)
26{ 26{
27 struct task_struct *stop = rq->stop; 27 struct task_struct *stop = rq->stop;
28 28
29 if (stop && stop->state == TASK_RUNNING) 29 if (stop && stop->se.on_rq)
30 return stop; 30 return stop;
31 31
32 return NULL; 32 return NULL;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c33a1edb799f..5abfa1518554 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -702,6 +702,15 @@ static struct ctl_table kern_table[] = {
702 .extra1 = &zero, 702 .extra1 = &zero,
703 .extra2 = &ten_thousand, 703 .extra2 = &ten_thousand,
704 }, 704 },
705 {
706 .procname = "dmesg_restrict",
707 .data = &dmesg_restrict,
708 .maxlen = sizeof(int),
709 .mode = 0644,
710 .proc_handler = proc_dointvec_minmax,
711 .extra1 = &zero,
712 .extra2 = &one,
713 },
705#endif 714#endif
706 { 715 {
707 .procname = "ngroups_max", 716 .procname = "ngroups_max",
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e04b8bcdef88..ea37e2ff4164 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -126,7 +126,7 @@ if FTRACE
126config FUNCTION_TRACER 126config FUNCTION_TRACER
127 bool "Kernel Function Tracer" 127 bool "Kernel Function Tracer"
128 depends on HAVE_FUNCTION_TRACER 128 depends on HAVE_FUNCTION_TRACER
129 select FRAME_POINTER if (!ARM_UNWIND) 129 select FRAME_POINTER if !ARM_UNWIND && !S390
130 select KALLSYMS 130 select KALLSYMS
131 select GENERIC_TRACER 131 select GENERIC_TRACER
132 select CONTEXT_SWITCH_TRACER 132 select CONTEXT_SWITCH_TRACER
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index bc251ed66724..7b8ec0281548 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -168,7 +168,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
168static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), 168static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
169 BLK_TC_ACT(BLK_TC_WRITE) }; 169 BLK_TC_ACT(BLK_TC_WRITE) };
170 170
171#define BLK_TC_HARDBARRIER BLK_TC_BARRIER
172#define BLK_TC_RAHEAD BLK_TC_AHEAD 171#define BLK_TC_RAHEAD BLK_TC_AHEAD
173 172
174/* The ilog2() calls fall out because they're constant */ 173/* The ilog2() calls fall out because they're constant */
@@ -196,7 +195,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
196 return; 195 return;
197 196
198 what |= ddir_act[rw & WRITE]; 197 what |= ddir_act[rw & WRITE];
199 what |= MASK_TC_BIT(rw, HARDBARRIER);
200 what |= MASK_TC_BIT(rw, SYNC); 198 what |= MASK_TC_BIT(rw, SYNC);
201 what |= MASK_TC_BIT(rw, RAHEAD); 199 what |= MASK_TC_BIT(rw, RAHEAD);
202 what |= MASK_TC_BIT(rw, META); 200 what |= MASK_TC_BIT(rw, META);
@@ -1807,8 +1805,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1807 1805
1808 if (rw & REQ_RAHEAD) 1806 if (rw & REQ_RAHEAD)
1809 rwbs[i++] = 'A'; 1807 rwbs[i++] = 'A';
1810 if (rw & REQ_HARDBARRIER)
1811 rwbs[i++] = 'B';
1812 if (rw & REQ_SYNC) 1808 if (rw & REQ_SYNC)
1813 rwbs[i++] = 'S'; 1809 rwbs[i++] = 'S';
1814 if (rw & REQ_META) 1810 if (rw & REQ_META)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 82d9b8106cd0..c380612273bf 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,7 +17,6 @@
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/smp_lock.h>
21#include <linux/notifier.h> 20#include <linux/notifier.h>
22#include <linux/irqflags.h> 21#include <linux/irqflags.h>
23#include <linux/debugfs.h> 22#include <linux/debugfs.h>
@@ -1284,6 +1283,8 @@ void trace_dump_stack(void)
1284 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); 1283 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1285} 1284}
1286 1285
1286static DEFINE_PER_CPU(int, user_stack_count);
1287
1287void 1288void
1288ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) 1289ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1289{ 1290{
@@ -1302,6 +1303,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1302 if (unlikely(in_nmi())) 1303 if (unlikely(in_nmi()))
1303 return; 1304 return;
1304 1305
1306 /*
1307 * prevent recursion, since the user stack tracing may
1308 * trigger other kernel events.
1309 */
1310 preempt_disable();
1311 if (__this_cpu_read(user_stack_count))
1312 goto out;
1313
1314 __this_cpu_inc(user_stack_count);
1315
1316
1317
1305 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1318 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1306 sizeof(*entry), flags, pc); 1319 sizeof(*entry), flags, pc);
1307 if (!event) 1320 if (!event)
@@ -1319,6 +1332,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1319 save_stack_trace_user(&trace); 1332 save_stack_trace_user(&trace);
1320 if (!filter_check_discard(call, entry, buffer, event)) 1333 if (!filter_check_discard(call, entry, buffer, event))
1321 ring_buffer_unlock_commit(buffer, event); 1334 ring_buffer_unlock_commit(buffer, event);
1335
1336 __this_cpu_dec(user_stack_count);
1337
1338 out:
1339 preempt_enable();
1322} 1340}
1323 1341
1324#ifdef UNUSED 1342#ifdef UNUSED