diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 10 | ||||
-rw-r--r-- | kernel/fork.c | 15 | ||||
-rw-r--r-- | kernel/hw_breakpoint.c | 56 | ||||
-rw-r--r-- | kernel/kgdb.c | 3 | ||||
-rw-r--r-- | kernel/sched.c | 39 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 18 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 4 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 24 | ||||
-rw-r--r-- | kernel/trace/trace.c | 5 |
9 files changed, 123 insertions, 51 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 1c8ddd6ee940..677f25376a38 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -151,13 +151,13 @@ static inline void check_for_tasks(int cpu) | |||
151 | 151 | ||
152 | write_lock_irq(&tasklist_lock); | 152 | write_lock_irq(&tasklist_lock); |
153 | for_each_process(p) { | 153 | for_each_process(p) { |
154 | if (task_cpu(p) == cpu && | 154 | if (task_cpu(p) == cpu && p->state == TASK_RUNNING && |
155 | (!cputime_eq(p->utime, cputime_zero) || | 155 | (!cputime_eq(p->utime, cputime_zero) || |
156 | !cputime_eq(p->stime, cputime_zero))) | 156 | !cputime_eq(p->stime, cputime_zero))) |
157 | printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\ | 157 | printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d " |
158 | (state = %ld, flags = %x) \n", | 158 | "(state = %ld, flags = %x)\n", |
159 | p->comm, task_pid_nr(p), cpu, | 159 | p->comm, task_pid_nr(p), cpu, |
160 | p->state, p->flags); | 160 | p->state, p->flags); |
161 | } | 161 | } |
162 | write_unlock_irq(&tasklist_lock); | 162 | write_unlock_irq(&tasklist_lock); |
163 | } | 163 | } |
diff --git a/kernel/fork.c b/kernel/fork.c index 5b2959b3ffc2..f88bd984df35 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1241,21 +1241,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1241 | /* Need tasklist lock for parent etc handling! */ | 1241 | /* Need tasklist lock for parent etc handling! */ |
1242 | write_lock_irq(&tasklist_lock); | 1242 | write_lock_irq(&tasklist_lock); |
1243 | 1243 | ||
1244 | /* | ||
1245 | * The task hasn't been attached yet, so its cpus_allowed mask will | ||
1246 | * not be changed, nor will its assigned CPU. | ||
1247 | * | ||
1248 | * The cpus_allowed mask of the parent may have changed after it was | ||
1249 | * copied first time - so re-copy it here, then check the child's CPU | ||
1250 | * to ensure it is on a valid CPU (and if not, just force it back to | ||
1251 | * parent's CPU). This avoids alot of nasty races. | ||
1252 | */ | ||
1253 | p->cpus_allowed = current->cpus_allowed; | ||
1254 | p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; | ||
1255 | if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || | ||
1256 | !cpu_online(task_cpu(p)))) | ||
1257 | set_task_cpu(p, smp_processor_id()); | ||
1258 | |||
1259 | /* CLONE_PARENT re-uses the old parent */ | 1244 | /* CLONE_PARENT re-uses the old parent */ |
1260 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { | 1245 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { |
1261 | p->real_parent = current->real_parent; | 1246 | p->real_parent = current->real_parent; |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 50dbd5999588..8a5c7d55ac9f 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
@@ -243,38 +243,70 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) | |||
243 | * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) | 243 | * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) |
244 | * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM | 244 | * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM |
245 | */ | 245 | */ |
246 | int reserve_bp_slot(struct perf_event *bp) | 246 | static int __reserve_bp_slot(struct perf_event *bp) |
247 | { | 247 | { |
248 | struct bp_busy_slots slots = {0}; | 248 | struct bp_busy_slots slots = {0}; |
249 | int ret = 0; | ||
250 | |||
251 | mutex_lock(&nr_bp_mutex); | ||
252 | 249 | ||
253 | fetch_bp_busy_slots(&slots, bp); | 250 | fetch_bp_busy_slots(&slots, bp); |
254 | 251 | ||
255 | /* Flexible counters need to keep at least one slot */ | 252 | /* Flexible counters need to keep at least one slot */ |
256 | if (slots.pinned + (!!slots.flexible) == HBP_NUM) { | 253 | if (slots.pinned + (!!slots.flexible) == HBP_NUM) |
257 | ret = -ENOSPC; | 254 | return -ENOSPC; |
258 | goto end; | ||
259 | } | ||
260 | 255 | ||
261 | toggle_bp_slot(bp, true); | 256 | toggle_bp_slot(bp, true); |
262 | 257 | ||
263 | end: | 258 | return 0; |
259 | } | ||
260 | |||
261 | int reserve_bp_slot(struct perf_event *bp) | ||
262 | { | ||
263 | int ret; | ||
264 | |||
265 | mutex_lock(&nr_bp_mutex); | ||
266 | |||
267 | ret = __reserve_bp_slot(bp); | ||
268 | |||
264 | mutex_unlock(&nr_bp_mutex); | 269 | mutex_unlock(&nr_bp_mutex); |
265 | 270 | ||
266 | return ret; | 271 | return ret; |
267 | } | 272 | } |
268 | 273 | ||
274 | static void __release_bp_slot(struct perf_event *bp) | ||
275 | { | ||
276 | toggle_bp_slot(bp, false); | ||
277 | } | ||
278 | |||
269 | void release_bp_slot(struct perf_event *bp) | 279 | void release_bp_slot(struct perf_event *bp) |
270 | { | 280 | { |
271 | mutex_lock(&nr_bp_mutex); | 281 | mutex_lock(&nr_bp_mutex); |
272 | 282 | ||
273 | toggle_bp_slot(bp, false); | 283 | __release_bp_slot(bp); |
274 | 284 | ||
275 | mutex_unlock(&nr_bp_mutex); | 285 | mutex_unlock(&nr_bp_mutex); |
276 | } | 286 | } |
277 | 287 | ||
288 | /* | ||
289 | * Allow the kernel debugger to reserve breakpoint slots without | ||
290 | * taking a lock using the dbg_* variant of for the reserve and | ||
291 | * release breakpoint slots. | ||
292 | */ | ||
293 | int dbg_reserve_bp_slot(struct perf_event *bp) | ||
294 | { | ||
295 | if (mutex_is_locked(&nr_bp_mutex)) | ||
296 | return -1; | ||
297 | |||
298 | return __reserve_bp_slot(bp); | ||
299 | } | ||
300 | |||
301 | int dbg_release_bp_slot(struct perf_event *bp) | ||
302 | { | ||
303 | if (mutex_is_locked(&nr_bp_mutex)) | ||
304 | return -1; | ||
305 | |||
306 | __release_bp_slot(bp); | ||
307 | |||
308 | return 0; | ||
309 | } | ||
278 | 310 | ||
279 | int register_perf_hw_breakpoint(struct perf_event *bp) | 311 | int register_perf_hw_breakpoint(struct perf_event *bp) |
280 | { | 312 | { |
@@ -296,6 +328,10 @@ int register_perf_hw_breakpoint(struct perf_event *bp) | |||
296 | if (!bp->attr.disabled || !bp->overflow_handler) | 328 | if (!bp->attr.disabled || !bp->overflow_handler) |
297 | ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | 329 | ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); |
298 | 330 | ||
331 | /* if arch_validate_hwbkpt_settings() fails then release bp slot */ | ||
332 | if (ret) | ||
333 | release_bp_slot(bp); | ||
334 | |||
299 | return ret; | 335 | return ret; |
300 | } | 336 | } |
301 | 337 | ||
diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 2eb517e23514..c7ade62e4ef0 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c | |||
@@ -583,6 +583,9 @@ static void kgdb_wait(struct pt_regs *regs) | |||
583 | smp_wmb(); | 583 | smp_wmb(); |
584 | atomic_set(&cpu_in_kgdb[cpu], 1); | 584 | atomic_set(&cpu_in_kgdb[cpu], 1); |
585 | 585 | ||
586 | /* Disable any cpu specific hw breakpoints */ | ||
587 | kgdb_disable_hw_debug(regs); | ||
588 | |||
586 | /* Wait till primary CPU is done with debugging */ | 589 | /* Wait till primary CPU is done with debugging */ |
587 | while (atomic_read(&passive_cpu_wait[cpu])) | 590 | while (atomic_read(&passive_cpu_wait[cpu])) |
588 | cpu_relax(); | 591 | cpu_relax(); |
diff --git a/kernel/sched.c b/kernel/sched.c index 4508fe7048be..3a8fb30a91b1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2320,14 +2320,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
2320 | } | 2320 | } |
2321 | 2321 | ||
2322 | /* | 2322 | /* |
2323 | * Called from: | 2323 | * Gets called from 3 sites (exec, fork, wakeup), since it is called without |
2324 | * holding rq->lock we need to ensure ->cpus_allowed is stable, this is done | ||
2325 | * by: | ||
2324 | * | 2326 | * |
2325 | * - fork, @p is stable because it isn't on the tasklist yet | 2327 | * exec: is unstable, retry loop |
2326 | * | 2328 | * fork & wake-up: serialize ->cpus_allowed against TASK_WAKING |
2327 | * - exec, @p is unstable, retry loop | ||
2328 | * | ||
2329 | * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so | ||
2330 | * we should be good. | ||
2331 | */ | 2329 | */ |
2332 | static inline | 2330 | static inline |
2333 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) | 2331 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) |
@@ -2620,9 +2618,6 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2620 | if (p->sched_class->task_fork) | 2618 | if (p->sched_class->task_fork) |
2621 | p->sched_class->task_fork(p); | 2619 | p->sched_class->task_fork(p); |
2622 | 2620 | ||
2623 | #ifdef CONFIG_SMP | ||
2624 | cpu = select_task_rq(p, SD_BALANCE_FORK, 0); | ||
2625 | #endif | ||
2626 | set_task_cpu(p, cpu); | 2621 | set_task_cpu(p, cpu); |
2627 | 2622 | ||
2628 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2623 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
@@ -2652,6 +2647,21 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2652 | { | 2647 | { |
2653 | unsigned long flags; | 2648 | unsigned long flags; |
2654 | struct rq *rq; | 2649 | struct rq *rq; |
2650 | int cpu = get_cpu(); | ||
2651 | |||
2652 | #ifdef CONFIG_SMP | ||
2653 | /* | ||
2654 | * Fork balancing, do it here and not earlier because: | ||
2655 | * - cpus_allowed can change in the fork path | ||
2656 | * - any previously selected cpu might disappear through hotplug | ||
2657 | * | ||
2658 | * We still have TASK_WAKING but PF_STARTING is gone now, meaning | ||
2659 | * ->cpus_allowed is stable, we have preemption disabled, meaning | ||
2660 | * cpu_online_mask is stable. | ||
2661 | */ | ||
2662 | cpu = select_task_rq(p, SD_BALANCE_FORK, 0); | ||
2663 | set_task_cpu(p, cpu); | ||
2664 | #endif | ||
2655 | 2665 | ||
2656 | rq = task_rq_lock(p, &flags); | 2666 | rq = task_rq_lock(p, &flags); |
2657 | BUG_ON(p->state != TASK_WAKING); | 2667 | BUG_ON(p->state != TASK_WAKING); |
@@ -2665,6 +2675,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2665 | p->sched_class->task_woken(rq, p); | 2675 | p->sched_class->task_woken(rq, p); |
2666 | #endif | 2676 | #endif |
2667 | task_rq_unlock(rq, &flags); | 2677 | task_rq_unlock(rq, &flags); |
2678 | put_cpu(); | ||
2668 | } | 2679 | } |
2669 | 2680 | ||
2670 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2681 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
@@ -7139,14 +7150,18 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7139 | * the ->cpus_allowed mask from under waking tasks, which would be | 7150 | * the ->cpus_allowed mask from under waking tasks, which would be |
7140 | * possible when we change rq->lock in ttwu(), so synchronize against | 7151 | * possible when we change rq->lock in ttwu(), so synchronize against |
7141 | * TASK_WAKING to avoid that. | 7152 | * TASK_WAKING to avoid that. |
7153 | * | ||
7154 | * Make an exception for freshly cloned tasks, since cpuset namespaces | ||
7155 | * might move the task about, we have to validate the target in | ||
7156 | * wake_up_new_task() anyway since the cpu might have gone away. | ||
7142 | */ | 7157 | */ |
7143 | again: | 7158 | again: |
7144 | while (p->state == TASK_WAKING) | 7159 | while (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) |
7145 | cpu_relax(); | 7160 | cpu_relax(); |
7146 | 7161 | ||
7147 | rq = task_rq_lock(p, &flags); | 7162 | rq = task_rq_lock(p, &flags); |
7148 | 7163 | ||
7149 | if (p->state == TASK_WAKING) { | 7164 | if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) { |
7150 | task_rq_unlock(rq, &flags); | 7165 | task_rq_unlock(rq, &flags); |
7151 | goto again; | 7166 | goto again; |
7152 | } | 7167 | } |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e85c23404d34..13700833c181 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -343,7 +343,19 @@ static void clocksource_resume_watchdog(void) | |||
343 | { | 343 | { |
344 | unsigned long flags; | 344 | unsigned long flags; |
345 | 345 | ||
346 | spin_lock_irqsave(&watchdog_lock, flags); | 346 | /* |
347 | * We use trylock here to avoid a potential dead lock when | ||
348 | * kgdb calls this code after the kernel has been stopped with | ||
349 | * watchdog_lock held. When watchdog_lock is held we just | ||
350 | * return and accept, that the watchdog might trigger and mark | ||
351 | * the monitored clock source (usually TSC) unstable. | ||
352 | * | ||
353 | * This does not affect the other caller clocksource_resume() | ||
354 | * because at this point the kernel is UP, interrupts are | ||
355 | * disabled and nothing can hold watchdog_lock. | ||
356 | */ | ||
357 | if (!spin_trylock_irqsave(&watchdog_lock, flags)) | ||
358 | return; | ||
347 | clocksource_reset_watchdog(); | 359 | clocksource_reset_watchdog(); |
348 | spin_unlock_irqrestore(&watchdog_lock, flags); | 360 | spin_unlock_irqrestore(&watchdog_lock, flags); |
349 | } | 361 | } |
@@ -458,8 +470,8 @@ void clocksource_resume(void) | |||
458 | * clocksource_touch_watchdog - Update watchdog | 470 | * clocksource_touch_watchdog - Update watchdog |
459 | * | 471 | * |
460 | * Update the watchdog after exception contexts such as kgdb so as not | 472 | * Update the watchdog after exception contexts such as kgdb so as not |
461 | * to incorrectly trip the watchdog. | 473 | * to incorrectly trip the watchdog. This might fail when the kernel |
462 | * | 474 | * was stopped in code which holds watchdog_lock. |
463 | */ | 475 | */ |
464 | void clocksource_touch_watchdog(void) | 476 | void clocksource_touch_watchdog(void) |
465 | { | 477 | { |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 6c22d8a2f289..60e2ce0181ee 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -27,9 +27,7 @@ config HAVE_FUNCTION_GRAPH_TRACER | |||
27 | config HAVE_FUNCTION_GRAPH_FP_TEST | 27 | config HAVE_FUNCTION_GRAPH_FP_TEST |
28 | bool | 28 | bool |
29 | help | 29 | help |
30 | An arch may pass in a unique value (frame pointer) to both the | 30 | See Documentation/trace/ftrace-design.txt |
31 | entering and exiting of a function. On exit, the value is compared | ||
32 | and if it does not match, then it will panic the kernel. | ||
33 | 31 | ||
34 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST | 32 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST |
35 | bool | 33 | bool |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index edefe3b2801b..8c1b2d290718 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -464,6 +464,8 @@ struct ring_buffer_iter { | |||
464 | struct ring_buffer_per_cpu *cpu_buffer; | 464 | struct ring_buffer_per_cpu *cpu_buffer; |
465 | unsigned long head; | 465 | unsigned long head; |
466 | struct buffer_page *head_page; | 466 | struct buffer_page *head_page; |
467 | struct buffer_page *cache_reader_page; | ||
468 | unsigned long cache_read; | ||
467 | u64 read_stamp; | 469 | u64 read_stamp; |
468 | }; | 470 | }; |
469 | 471 | ||
@@ -2716,6 +2718,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) | |||
2716 | iter->read_stamp = cpu_buffer->read_stamp; | 2718 | iter->read_stamp = cpu_buffer->read_stamp; |
2717 | else | 2719 | else |
2718 | iter->read_stamp = iter->head_page->page->time_stamp; | 2720 | iter->read_stamp = iter->head_page->page->time_stamp; |
2721 | iter->cache_reader_page = cpu_buffer->reader_page; | ||
2722 | iter->cache_read = cpu_buffer->read; | ||
2719 | } | 2723 | } |
2720 | 2724 | ||
2721 | /** | 2725 | /** |
@@ -3060,13 +3064,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3060 | struct ring_buffer_event *event; | 3064 | struct ring_buffer_event *event; |
3061 | int nr_loops = 0; | 3065 | int nr_loops = 0; |
3062 | 3066 | ||
3063 | if (ring_buffer_iter_empty(iter)) | ||
3064 | return NULL; | ||
3065 | |||
3066 | cpu_buffer = iter->cpu_buffer; | 3067 | cpu_buffer = iter->cpu_buffer; |
3067 | buffer = cpu_buffer->buffer; | 3068 | buffer = cpu_buffer->buffer; |
3068 | 3069 | ||
3070 | /* | ||
3071 | * Check if someone performed a consuming read to | ||
3072 | * the buffer. A consuming read invalidates the iterator | ||
3073 | * and we need to reset the iterator in this case. | ||
3074 | */ | ||
3075 | if (unlikely(iter->cache_read != cpu_buffer->read || | ||
3076 | iter->cache_reader_page != cpu_buffer->reader_page)) | ||
3077 | rb_iter_reset(iter); | ||
3078 | |||
3069 | again: | 3079 | again: |
3080 | if (ring_buffer_iter_empty(iter)) | ||
3081 | return NULL; | ||
3082 | |||
3070 | /* | 3083 | /* |
3071 | * We repeat when a timestamp is encountered. | 3084 | * We repeat when a timestamp is encountered. |
3072 | * We can get multiple timestamps by nested interrupts or also | 3085 | * We can get multiple timestamps by nested interrupts or also |
@@ -3081,6 +3094,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3081 | if (rb_per_cpu_empty(cpu_buffer)) | 3094 | if (rb_per_cpu_empty(cpu_buffer)) |
3082 | return NULL; | 3095 | return NULL; |
3083 | 3096 | ||
3097 | if (iter->head >= local_read(&iter->head_page->page->commit)) { | ||
3098 | rb_inc_iter(iter); | ||
3099 | goto again; | ||
3100 | } | ||
3101 | |||
3084 | event = rb_iter_head_event(iter); | 3102 | event = rb_iter_head_event(iter); |
3085 | 3103 | ||
3086 | switch (event->type_len) { | 3104 | switch (event->type_len) { |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0df1b0f2cb9e..eac6875cb990 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -951,6 +951,11 @@ void trace_find_cmdline(int pid, char comm[]) | |||
951 | return; | 951 | return; |
952 | } | 952 | } |
953 | 953 | ||
954 | if (WARN_ON_ONCE(pid < 0)) { | ||
955 | strcpy(comm, "<XXX>"); | ||
956 | return; | ||
957 | } | ||
958 | |||
954 | if (pid > PID_MAX_DEFAULT) { | 959 | if (pid > PID_MAX_DEFAULT) { |
955 | strcpy(comm, "<...>"); | 960 | strcpy(comm, "<...>"); |
956 | return; | 961 | return; |