diff options
Diffstat (limited to 'kernel')
35 files changed, 925 insertions, 700 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 9a4715a2f6bf..a6605ca921b6 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
| @@ -536,7 +536,8 @@ static void do_acct_process(struct bsd_acct_struct *acct, | |||
| 536 | do_div(elapsed, AHZ); | 536 | do_div(elapsed, AHZ); |
| 537 | ac.ac_btime = get_seconds() - elapsed; | 537 | ac.ac_btime = get_seconds() - elapsed; |
| 538 | /* we really need to bite the bullet and change layout */ | 538 | /* we really need to bite the bullet and change layout */ |
| 539 | current_uid_gid(&ac.ac_uid, &ac.ac_gid); | 539 | ac.ac_uid = orig_cred->uid; |
| 540 | ac.ac_gid = orig_cred->gid; | ||
| 540 | #if ACCT_VERSION==2 | 541 | #if ACCT_VERSION==2 |
| 541 | ac.ac_ahz = AHZ; | 542 | ac.ac_ahz = AHZ; |
| 542 | #endif | 543 | #endif |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 7c4e2713df0a..291ac586f37f 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -212,6 +212,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
| 212 | err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, | 212 | err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, |
| 213 | hcpu, -1, &nr_calls); | 213 | hcpu, -1, &nr_calls); |
| 214 | if (err == NOTIFY_BAD) { | 214 | if (err == NOTIFY_BAD) { |
| 215 | set_cpu_active(cpu, true); | ||
| 216 | |||
| 215 | nr_calls--; | 217 | nr_calls--; |
| 216 | __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, | 218 | __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, |
| 217 | hcpu, nr_calls, NULL); | 219 | hcpu, nr_calls, NULL); |
| @@ -223,11 +225,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
| 223 | 225 | ||
| 224 | /* Ensure that we are not runnable on dying cpu */ | 226 | /* Ensure that we are not runnable on dying cpu */ |
| 225 | cpumask_copy(old_allowed, ¤t->cpus_allowed); | 227 | cpumask_copy(old_allowed, ¤t->cpus_allowed); |
| 226 | set_cpus_allowed_ptr(current, | 228 | set_cpus_allowed_ptr(current, cpu_active_mask); |
| 227 | cpumask_of(cpumask_any_but(cpu_online_mask, cpu))); | ||
| 228 | 229 | ||
| 229 | err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); | 230 | err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); |
| 230 | if (err) { | 231 | if (err) { |
| 232 | set_cpu_active(cpu, true); | ||
| 231 | /* CPU didn't die: tell everyone. Can't complain. */ | 233 | /* CPU didn't die: tell everyone. Can't complain. */ |
| 232 | if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, | 234 | if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, |
| 233 | hcpu) == NOTIFY_BAD) | 235 | hcpu) == NOTIFY_BAD) |
| @@ -292,9 +294,6 @@ int __ref cpu_down(unsigned int cpu) | |||
| 292 | 294 | ||
| 293 | err = _cpu_down(cpu, 0); | 295 | err = _cpu_down(cpu, 0); |
| 294 | 296 | ||
| 295 | if (cpu_online(cpu)) | ||
| 296 | set_cpu_active(cpu, true); | ||
| 297 | |||
| 298 | out: | 297 | out: |
| 299 | cpu_maps_update_done(); | 298 | cpu_maps_update_done(); |
| 300 | stop_machine_destroy(); | 299 | stop_machine_destroy(); |
| @@ -387,6 +386,15 @@ int disable_nonboot_cpus(void) | |||
| 387 | * with the userspace trying to use the CPU hotplug at the same time | 386 | * with the userspace trying to use the CPU hotplug at the same time |
| 388 | */ | 387 | */ |
| 389 | cpumask_clear(frozen_cpus); | 388 | cpumask_clear(frozen_cpus); |
| 389 | |||
| 390 | for_each_online_cpu(cpu) { | ||
| 391 | if (cpu == first_cpu) | ||
| 392 | continue; | ||
| 393 | set_cpu_active(cpu, false); | ||
| 394 | } | ||
| 395 | |||
| 396 | synchronize_sched(); | ||
| 397 | |||
| 390 | printk("Disabling non-boot CPUs ...\n"); | 398 | printk("Disabling non-boot CPUs ...\n"); |
| 391 | for_each_online_cpu(cpu) { | 399 | for_each_online_cpu(cpu) { |
| 392 | if (cpu == first_cpu) | 400 | if (cpu == first_cpu) |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3cf2183b472d..ba401fab459f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -737,7 +737,7 @@ static void do_rebuild_sched_domains(struct work_struct *unused) | |||
| 737 | { | 737 | { |
| 738 | } | 738 | } |
| 739 | 739 | ||
| 740 | static int generate_sched_domains(struct cpumask **domains, | 740 | static int generate_sched_domains(cpumask_var_t **domains, |
| 741 | struct sched_domain_attr **attributes) | 741 | struct sched_domain_attr **attributes) |
| 742 | { | 742 | { |
| 743 | *domains = NULL; | 743 | *domains = NULL; |
| @@ -872,7 +872,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
| 872 | if (retval < 0) | 872 | if (retval < 0) |
| 873 | return retval; | 873 | return retval; |
| 874 | 874 | ||
| 875 | if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask)) | 875 | if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask)) |
| 876 | return -EINVAL; | 876 | return -EINVAL; |
| 877 | } | 877 | } |
| 878 | retval = validate_change(cs, trialcs); | 878 | retval = validate_change(cs, trialcs); |
| @@ -2010,7 +2010,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
| 2010 | } | 2010 | } |
| 2011 | 2011 | ||
| 2012 | /* Continue past cpusets with all cpus, mems online */ | 2012 | /* Continue past cpusets with all cpus, mems online */ |
| 2013 | if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) && | 2013 | if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) && |
| 2014 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | 2014 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) |
| 2015 | continue; | 2015 | continue; |
| 2016 | 2016 | ||
| @@ -2019,7 +2019,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
| 2019 | /* Remove offline cpus and mems from this cpuset. */ | 2019 | /* Remove offline cpus and mems from this cpuset. */ |
| 2020 | mutex_lock(&callback_mutex); | 2020 | mutex_lock(&callback_mutex); |
| 2021 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, | 2021 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, |
| 2022 | cpu_online_mask); | 2022 | cpu_active_mask); |
| 2023 | nodes_and(cp->mems_allowed, cp->mems_allowed, | 2023 | nodes_and(cp->mems_allowed, cp->mems_allowed, |
| 2024 | node_states[N_HIGH_MEMORY]); | 2024 | node_states[N_HIGH_MEMORY]); |
| 2025 | mutex_unlock(&callback_mutex); | 2025 | mutex_unlock(&callback_mutex); |
| @@ -2057,8 +2057,10 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
| 2057 | switch (phase) { | 2057 | switch (phase) { |
| 2058 | case CPU_ONLINE: | 2058 | case CPU_ONLINE: |
| 2059 | case CPU_ONLINE_FROZEN: | 2059 | case CPU_ONLINE_FROZEN: |
| 2060 | case CPU_DEAD: | 2060 | case CPU_DOWN_PREPARE: |
| 2061 | case CPU_DEAD_FROZEN: | 2061 | case CPU_DOWN_PREPARE_FROZEN: |
| 2062 | case CPU_DOWN_FAILED: | ||
| 2063 | case CPU_DOWN_FAILED_FROZEN: | ||
| 2062 | break; | 2064 | break; |
| 2063 | 2065 | ||
| 2064 | default: | 2066 | default: |
| @@ -2067,7 +2069,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
| 2067 | 2069 | ||
| 2068 | cgroup_lock(); | 2070 | cgroup_lock(); |
| 2069 | mutex_lock(&callback_mutex); | 2071 | mutex_lock(&callback_mutex); |
| 2070 | cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); | 2072 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
| 2071 | mutex_unlock(&callback_mutex); | 2073 | mutex_unlock(&callback_mutex); |
| 2072 | scan_for_empty_cpusets(&top_cpuset); | 2074 | scan_for_empty_cpusets(&top_cpuset); |
| 2073 | ndoms = generate_sched_domains(&doms, &attr); | 2075 | ndoms = generate_sched_domains(&doms, &attr); |
| @@ -2114,7 +2116,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self, | |||
| 2114 | 2116 | ||
| 2115 | void __init cpuset_init_smp(void) | 2117 | void __init cpuset_init_smp(void) |
| 2116 | { | 2118 | { |
| 2117 | cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); | 2119 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
| 2118 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 2120 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
| 2119 | 2121 | ||
| 2120 | hotcpu_notifier(cpuset_track_online_cpus, 0); | 2122 | hotcpu_notifier(cpuset_track_online_cpus, 0); |
diff --git a/kernel/exit.c b/kernel/exit.c index 1143012951e9..6f50ef55a6f3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -971,7 +971,7 @@ NORET_TYPE void do_exit(long code) | |||
| 971 | exit_thread(); | 971 | exit_thread(); |
| 972 | cgroup_exit(tsk, 1); | 972 | cgroup_exit(tsk, 1); |
| 973 | 973 | ||
| 974 | if (group_dead && tsk->signal->leader) | 974 | if (group_dead) |
| 975 | disassociate_ctty(1); | 975 | disassociate_ctty(1); |
| 976 | 976 | ||
| 977 | module_put(task_thread_info(tsk)->exec_domain->module); | 977 | module_put(task_thread_info(tsk)->exec_domain->module); |
diff --git a/kernel/futex.c b/kernel/futex.c index fb65e822fc41..d73ef1f3e55d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -304,8 +304,14 @@ void put_futex_key(int fshared, union futex_key *key) | |||
| 304 | */ | 304 | */ |
| 305 | static int fault_in_user_writeable(u32 __user *uaddr) | 305 | static int fault_in_user_writeable(u32 __user *uaddr) |
| 306 | { | 306 | { |
| 307 | int ret = get_user_pages(current, current->mm, (unsigned long)uaddr, | 307 | struct mm_struct *mm = current->mm; |
| 308 | 1, 1, 0, NULL, NULL); | 308 | int ret; |
| 309 | |||
| 310 | down_read(&mm->mmap_sem); | ||
| 311 | ret = get_user_pages(current, mm, (unsigned long)uaddr, | ||
| 312 | 1, 1, 0, NULL, NULL); | ||
| 313 | up_read(&mm->mmap_sem); | ||
| 314 | |||
| 309 | return ret < 0 ? ret : 0; | 315 | return ret < 0 ? ret : 0; |
| 310 | } | 316 | } |
| 311 | 317 | ||
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ede527708123..d2f9239dc6ba 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
| @@ -557,7 +557,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) | |||
| 557 | static int hrtimer_reprogram(struct hrtimer *timer, | 557 | static int hrtimer_reprogram(struct hrtimer *timer, |
| 558 | struct hrtimer_clock_base *base) | 558 | struct hrtimer_clock_base *base) |
| 559 | { | 559 | { |
| 560 | ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; | 560 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
| 561 | ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); | 561 | ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); |
| 562 | int res; | 562 | int res; |
| 563 | 563 | ||
| @@ -582,7 +582,16 @@ static int hrtimer_reprogram(struct hrtimer *timer, | |||
| 582 | if (expires.tv64 < 0) | 582 | if (expires.tv64 < 0) |
| 583 | return -ETIME; | 583 | return -ETIME; |
| 584 | 584 | ||
| 585 | if (expires.tv64 >= expires_next->tv64) | 585 | if (expires.tv64 >= cpu_base->expires_next.tv64) |
| 586 | return 0; | ||
| 587 | |||
| 588 | /* | ||
| 589 | * If a hang was detected in the last timer interrupt then we | ||
| 590 | * do not schedule a timer which is earlier than the expiry | ||
| 591 | * which we enforced in the hang detection. We want the system | ||
| 592 | * to make progress. | ||
| 593 | */ | ||
| 594 | if (cpu_base->hang_detected) | ||
| 586 | return 0; | 595 | return 0; |
| 587 | 596 | ||
| 588 | /* | 597 | /* |
| @@ -590,7 +599,7 @@ static int hrtimer_reprogram(struct hrtimer *timer, | |||
| 590 | */ | 599 | */ |
| 591 | res = tick_program_event(expires, 0); | 600 | res = tick_program_event(expires, 0); |
| 592 | if (!IS_ERR_VALUE(res)) | 601 | if (!IS_ERR_VALUE(res)) |
| 593 | *expires_next = expires; | 602 | cpu_base->expires_next = expires; |
| 594 | return res; | 603 | return res; |
| 595 | } | 604 | } |
| 596 | 605 | ||
| @@ -747,17 +756,33 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } | |||
| 747 | 756 | ||
| 748 | #endif /* CONFIG_HIGH_RES_TIMERS */ | 757 | #endif /* CONFIG_HIGH_RES_TIMERS */ |
| 749 | 758 | ||
| 750 | #ifdef CONFIG_TIMER_STATS | 759 | static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) |
| 751 | void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr) | ||
| 752 | { | 760 | { |
| 761 | #ifdef CONFIG_TIMER_STATS | ||
| 753 | if (timer->start_site) | 762 | if (timer->start_site) |
| 754 | return; | 763 | return; |
| 755 | 764 | timer->start_site = __builtin_return_address(0); | |
| 756 | timer->start_site = addr; | ||
| 757 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | 765 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); |
| 758 | timer->start_pid = current->pid; | 766 | timer->start_pid = current->pid; |
| 767 | #endif | ||
| 759 | } | 768 | } |
| 769 | |||
| 770 | static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) | ||
| 771 | { | ||
| 772 | #ifdef CONFIG_TIMER_STATS | ||
| 773 | timer->start_site = NULL; | ||
| 774 | #endif | ||
| 775 | } | ||
| 776 | |||
| 777 | static inline void timer_stats_account_hrtimer(struct hrtimer *timer) | ||
| 778 | { | ||
| 779 | #ifdef CONFIG_TIMER_STATS | ||
| 780 | if (likely(!timer_stats_active)) | ||
| 781 | return; | ||
| 782 | timer_stats_update_stats(timer, timer->start_pid, timer->start_site, | ||
| 783 | timer->function, timer->start_comm, 0); | ||
| 760 | #endif | 784 | #endif |
| 785 | } | ||
| 761 | 786 | ||
| 762 | /* | 787 | /* |
| 763 | * Counterpart to lock_hrtimer_base above: | 788 | * Counterpart to lock_hrtimer_base above: |
| @@ -1217,30 +1242,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) | |||
| 1217 | 1242 | ||
| 1218 | #ifdef CONFIG_HIGH_RES_TIMERS | 1243 | #ifdef CONFIG_HIGH_RES_TIMERS |
| 1219 | 1244 | ||
| 1220 | static int force_clock_reprogram; | ||
| 1221 | |||
| 1222 | /* | ||
| 1223 | * After 5 iteration's attempts, we consider that hrtimer_interrupt() | ||
| 1224 | * is hanging, which could happen with something that slows the interrupt | ||
| 1225 | * such as the tracing. Then we force the clock reprogramming for each future | ||
| 1226 | * hrtimer interrupts to avoid infinite loops and use the min_delta_ns | ||
| 1227 | * threshold that we will overwrite. | ||
| 1228 | * The next tick event will be scheduled to 3 times we currently spend on | ||
| 1229 | * hrtimer_interrupt(). This gives a good compromise, the cpus will spend | ||
| 1230 | * 1/4 of their time to process the hrtimer interrupts. This is enough to | ||
| 1231 | * let it running without serious starvation. | ||
| 1232 | */ | ||
| 1233 | |||
| 1234 | static inline void | ||
| 1235 | hrtimer_interrupt_hanging(struct clock_event_device *dev, | ||
| 1236 | ktime_t try_time) | ||
| 1237 | { | ||
| 1238 | force_clock_reprogram = 1; | ||
| 1239 | dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; | ||
| 1240 | printk(KERN_WARNING "hrtimer: interrupt too slow, " | ||
| 1241 | "forcing clock min delta to %llu ns\n", | ||
| 1242 | (unsigned long long) dev->min_delta_ns); | ||
| 1243 | } | ||
| 1244 | /* | 1245 | /* |
| 1245 | * High resolution timer interrupt | 1246 | * High resolution timer interrupt |
| 1246 | * Called with interrupts disabled | 1247 | * Called with interrupts disabled |
| @@ -1249,21 +1250,15 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
| 1249 | { | 1250 | { |
| 1250 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | 1251 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
| 1251 | struct hrtimer_clock_base *base; | 1252 | struct hrtimer_clock_base *base; |
| 1252 | ktime_t expires_next, now; | 1253 | ktime_t expires_next, now, entry_time, delta; |
| 1253 | int nr_retries = 0; | 1254 | int i, retries = 0; |
| 1254 | int i; | ||
| 1255 | 1255 | ||
| 1256 | BUG_ON(!cpu_base->hres_active); | 1256 | BUG_ON(!cpu_base->hres_active); |
| 1257 | cpu_base->nr_events++; | 1257 | cpu_base->nr_events++; |
| 1258 | dev->next_event.tv64 = KTIME_MAX; | 1258 | dev->next_event.tv64 = KTIME_MAX; |
| 1259 | 1259 | ||
| 1260 | retry: | 1260 | entry_time = now = ktime_get(); |
| 1261 | /* 5 retries is enough to notice a hang */ | 1261 | retry: |
| 1262 | if (!(++nr_retries % 5)) | ||
| 1263 | hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now)); | ||
| 1264 | |||
| 1265 | now = ktime_get(); | ||
| 1266 | |||
| 1267 | expires_next.tv64 = KTIME_MAX; | 1262 | expires_next.tv64 = KTIME_MAX; |
| 1268 | 1263 | ||
| 1269 | spin_lock(&cpu_base->lock); | 1264 | spin_lock(&cpu_base->lock); |
| @@ -1325,10 +1320,48 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
| 1325 | spin_unlock(&cpu_base->lock); | 1320 | spin_unlock(&cpu_base->lock); |
| 1326 | 1321 | ||
| 1327 | /* Reprogramming necessary ? */ | 1322 | /* Reprogramming necessary ? */ |
| 1328 | if (expires_next.tv64 != KTIME_MAX) { | 1323 | if (expires_next.tv64 == KTIME_MAX || |
| 1329 | if (tick_program_event(expires_next, force_clock_reprogram)) | 1324 | !tick_program_event(expires_next, 0)) { |
| 1330 | goto retry; | 1325 | cpu_base->hang_detected = 0; |
| 1326 | return; | ||
| 1331 | } | 1327 | } |
| 1328 | |||
| 1329 | /* | ||
| 1330 | * The next timer was already expired due to: | ||
| 1331 | * - tracing | ||
| 1332 | * - long lasting callbacks | ||
| 1333 | * - being scheduled away when running in a VM | ||
| 1334 | * | ||
| 1335 | * We need to prevent that we loop forever in the hrtimer | ||
| 1336 | * interrupt routine. We give it 3 attempts to avoid | ||
| 1337 | * overreacting on some spurious event. | ||
| 1338 | */ | ||
| 1339 | now = ktime_get(); | ||
| 1340 | cpu_base->nr_retries++; | ||
| 1341 | if (++retries < 3) | ||
| 1342 | goto retry; | ||
| 1343 | /* | ||
| 1344 | * Give the system a chance to do something else than looping | ||
| 1345 | * here. We stored the entry time, so we know exactly how long | ||
| 1346 | * we spent here. We schedule the next event this amount of | ||
| 1347 | * time away. | ||
| 1348 | */ | ||
| 1349 | cpu_base->nr_hangs++; | ||
| 1350 | cpu_base->hang_detected = 1; | ||
| 1351 | delta = ktime_sub(now, entry_time); | ||
| 1352 | if (delta.tv64 > cpu_base->max_hang_time.tv64) | ||
| 1353 | cpu_base->max_hang_time = delta; | ||
| 1354 | /* | ||
| 1355 | * Limit it to a sensible value as we enforce a longer | ||
| 1356 | * delay. Give the CPU at least 100ms to catch up. | ||
| 1357 | */ | ||
| 1358 | if (delta.tv64 > 100 * NSEC_PER_MSEC) | ||
| 1359 | expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); | ||
| 1360 | else | ||
| 1361 | expires_next = ktime_add(now, delta); | ||
| 1362 | tick_program_event(expires_next, 1); | ||
| 1363 | printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n", | ||
| 1364 | ktime_to_ns(delta)); | ||
| 1332 | } | 1365 | } |
| 1333 | 1366 | ||
| 1334 | /* | 1367 | /* |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index cf5ee1628411..366eedf949c0 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
| @@ -52,7 +52,7 @@ | |||
| 52 | static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); | 52 | static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); |
| 53 | 53 | ||
| 54 | /* Number of pinned task breakpoints in a cpu */ | 54 | /* Number of pinned task breakpoints in a cpu */ |
| 55 | static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); | 55 | static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]); |
| 56 | 56 | ||
| 57 | /* Number of non-pinned cpu/task breakpoints in a cpu */ | 57 | /* Number of non-pinned cpu/task breakpoints in a cpu */ |
| 58 | static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); | 58 | static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); |
| @@ -73,7 +73,7 @@ static DEFINE_MUTEX(nr_bp_mutex); | |||
| 73 | static unsigned int max_task_bp_pinned(int cpu) | 73 | static unsigned int max_task_bp_pinned(int cpu) |
| 74 | { | 74 | { |
| 75 | int i; | 75 | int i; |
| 76 | unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); | 76 | unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); |
| 77 | 77 | ||
| 78 | for (i = HBP_NUM -1; i >= 0; i--) { | 78 | for (i = HBP_NUM -1; i >= 0; i--) { |
| 79 | if (tsk_pinned[i] > 0) | 79 | if (tsk_pinned[i] > 0) |
| @@ -83,15 +83,51 @@ static unsigned int max_task_bp_pinned(int cpu) | |||
| 83 | return 0; | 83 | return 0; |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | static int task_bp_pinned(struct task_struct *tsk) | ||
| 87 | { | ||
| 88 | struct perf_event_context *ctx = tsk->perf_event_ctxp; | ||
| 89 | struct list_head *list; | ||
| 90 | struct perf_event *bp; | ||
| 91 | unsigned long flags; | ||
| 92 | int count = 0; | ||
| 93 | |||
| 94 | if (WARN_ONCE(!ctx, "No perf context for this task")) | ||
| 95 | return 0; | ||
| 96 | |||
| 97 | list = &ctx->event_list; | ||
| 98 | |||
| 99 | spin_lock_irqsave(&ctx->lock, flags); | ||
| 100 | |||
| 101 | /* | ||
| 102 | * The current breakpoint counter is not included in the list | ||
| 103 | * at the open() callback time | ||
| 104 | */ | ||
| 105 | list_for_each_entry(bp, list, event_entry) { | ||
| 106 | if (bp->attr.type == PERF_TYPE_BREAKPOINT) | ||
| 107 | count++; | ||
| 108 | } | ||
| 109 | |||
| 110 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
| 111 | |||
| 112 | return count; | ||
| 113 | } | ||
| 114 | |||
| 86 | /* | 115 | /* |
| 87 | * Report the number of pinned/un-pinned breakpoints we have in | 116 | * Report the number of pinned/un-pinned breakpoints we have in |
| 88 | * a given cpu (cpu > -1) or in all of them (cpu = -1). | 117 | * a given cpu (cpu > -1) or in all of them (cpu = -1). |
| 89 | */ | 118 | */ |
| 90 | static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) | 119 | static void |
| 120 | fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) | ||
| 91 | { | 121 | { |
| 122 | int cpu = bp->cpu; | ||
| 123 | struct task_struct *tsk = bp->ctx->task; | ||
| 124 | |||
| 92 | if (cpu >= 0) { | 125 | if (cpu >= 0) { |
| 93 | slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); | 126 | slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); |
| 94 | slots->pinned += max_task_bp_pinned(cpu); | 127 | if (!tsk) |
| 128 | slots->pinned += max_task_bp_pinned(cpu); | ||
| 129 | else | ||
| 130 | slots->pinned += task_bp_pinned(tsk); | ||
| 95 | slots->flexible = per_cpu(nr_bp_flexible, cpu); | 131 | slots->flexible = per_cpu(nr_bp_flexible, cpu); |
| 96 | 132 | ||
| 97 | return; | 133 | return; |
| @@ -101,7 +137,10 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) | |||
| 101 | unsigned int nr; | 137 | unsigned int nr; |
| 102 | 138 | ||
| 103 | nr = per_cpu(nr_cpu_bp_pinned, cpu); | 139 | nr = per_cpu(nr_cpu_bp_pinned, cpu); |
| 104 | nr += max_task_bp_pinned(cpu); | 140 | if (!tsk) |
| 141 | nr += max_task_bp_pinned(cpu); | ||
| 142 | else | ||
| 143 | nr += task_bp_pinned(tsk); | ||
| 105 | 144 | ||
| 106 | if (nr > slots->pinned) | 145 | if (nr > slots->pinned) |
| 107 | slots->pinned = nr; | 146 | slots->pinned = nr; |
| @@ -118,35 +157,12 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) | |||
| 118 | */ | 157 | */ |
| 119 | static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) | 158 | static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) |
| 120 | { | 159 | { |
| 121 | int count = 0; | ||
| 122 | struct perf_event *bp; | ||
| 123 | struct perf_event_context *ctx = tsk->perf_event_ctxp; | ||
| 124 | unsigned int *tsk_pinned; | 160 | unsigned int *tsk_pinned; |
| 125 | struct list_head *list; | 161 | int count = 0; |
| 126 | unsigned long flags; | ||
| 127 | |||
| 128 | if (WARN_ONCE(!ctx, "No perf context for this task")) | ||
| 129 | return; | ||
| 130 | |||
| 131 | list = &ctx->event_list; | ||
| 132 | |||
| 133 | spin_lock_irqsave(&ctx->lock, flags); | ||
| 134 | |||
| 135 | /* | ||
| 136 | * The current breakpoint counter is not included in the list | ||
| 137 | * at the open() callback time | ||
| 138 | */ | ||
| 139 | list_for_each_entry(bp, list, event_entry) { | ||
| 140 | if (bp->attr.type == PERF_TYPE_BREAKPOINT) | ||
| 141 | count++; | ||
| 142 | } | ||
| 143 | 162 | ||
| 144 | spin_unlock_irqrestore(&ctx->lock, flags); | 163 | count = task_bp_pinned(tsk); |
| 145 | 164 | ||
| 146 | if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) | 165 | tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); |
| 147 | return; | ||
| 148 | |||
| 149 | tsk_pinned = per_cpu(task_bp_pinned, cpu); | ||
| 150 | if (enable) { | 166 | if (enable) { |
| 151 | tsk_pinned[count]++; | 167 | tsk_pinned[count]++; |
| 152 | if (count > 0) | 168 | if (count > 0) |
| @@ -193,7 +209,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) | |||
| 193 | * - If attached to a single cpu, check: | 209 | * - If attached to a single cpu, check: |
| 194 | * | 210 | * |
| 195 | * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) | 211 | * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) |
| 196 | * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM | 212 | * + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM |
| 197 | * | 213 | * |
| 198 | * -> If there are already non-pinned counters in this cpu, it means | 214 | * -> If there are already non-pinned counters in this cpu, it means |
| 199 | * there is already a free slot for them. | 215 | * there is already a free slot for them. |
| @@ -204,7 +220,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) | |||
| 204 | * - If attached to every cpus, check: | 220 | * - If attached to every cpus, check: |
| 205 | * | 221 | * |
| 206 | * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) | 222 | * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) |
| 207 | * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM | 223 | * + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM |
| 208 | * | 224 | * |
| 209 | * -> This is roughly the same, except we check the number of per cpu | 225 | * -> This is roughly the same, except we check the number of per cpu |
| 210 | * bp for every cpu and we keep the max one. Same for the per tasks | 226 | * bp for every cpu and we keep the max one. Same for the per tasks |
| @@ -216,7 +232,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) | |||
| 216 | * - If attached to a single cpu, check: | 232 | * - If attached to a single cpu, check: |
| 217 | * | 233 | * |
| 218 | * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) | 234 | * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) |
| 219 | * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM | 235 | * + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM |
| 220 | * | 236 | * |
| 221 | * -> Same checks as before. But now the nr_bp_flexible, if any, must keep | 237 | * -> Same checks as before. But now the nr_bp_flexible, if any, must keep |
| 222 | * one register at least (or they will never be fed). | 238 | * one register at least (or they will never be fed). |
| @@ -224,7 +240,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) | |||
| 224 | * - If attached to every cpus, check: | 240 | * - If attached to every cpus, check: |
| 225 | * | 241 | * |
| 226 | * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) | 242 | * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) |
| 227 | * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM | 243 | * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM |
| 228 | */ | 244 | */ |
| 229 | int reserve_bp_slot(struct perf_event *bp) | 245 | int reserve_bp_slot(struct perf_event *bp) |
| 230 | { | 246 | { |
| @@ -233,7 +249,7 @@ int reserve_bp_slot(struct perf_event *bp) | |||
| 233 | 249 | ||
| 234 | mutex_lock(&nr_bp_mutex); | 250 | mutex_lock(&nr_bp_mutex); |
| 235 | 251 | ||
| 236 | fetch_bp_busy_slots(&slots, bp->cpu); | 252 | fetch_bp_busy_slots(&slots, bp); |
| 237 | 253 | ||
| 238 | /* Flexible counters need to keep at least one slot */ | 254 | /* Flexible counters need to keep at least one slot */ |
| 239 | if (slots.pinned + (!!slots.flexible) == HBP_NUM) { | 255 | if (slots.pinned + (!!slots.flexible) == HBP_NUM) { |
| @@ -259,7 +275,7 @@ void release_bp_slot(struct perf_event *bp) | |||
| 259 | } | 275 | } |
| 260 | 276 | ||
| 261 | 277 | ||
| 262 | int __register_perf_hw_breakpoint(struct perf_event *bp) | 278 | int register_perf_hw_breakpoint(struct perf_event *bp) |
| 263 | { | 279 | { |
| 264 | int ret; | 280 | int ret; |
| 265 | 281 | ||
| @@ -276,19 +292,12 @@ int __register_perf_hw_breakpoint(struct perf_event *bp) | |||
| 276 | * This is a quick hack that will be removed soon, once we remove | 292 | * This is a quick hack that will be removed soon, once we remove |
| 277 | * the tmp breakpoints from ptrace | 293 | * the tmp breakpoints from ptrace |
| 278 | */ | 294 | */ |
| 279 | if (!bp->attr.disabled || bp->callback == perf_bp_event) | 295 | if (!bp->attr.disabled || !bp->overflow_handler) |
| 280 | ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | 296 | ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); |
| 281 | 297 | ||
| 282 | return ret; | 298 | return ret; |
| 283 | } | 299 | } |
| 284 | 300 | ||
| 285 | int register_perf_hw_breakpoint(struct perf_event *bp) | ||
| 286 | { | ||
| 287 | bp->callback = perf_bp_event; | ||
| 288 | |||
| 289 | return __register_perf_hw_breakpoint(bp); | ||
| 290 | } | ||
| 291 | |||
| 292 | /** | 301 | /** |
| 293 | * register_user_hw_breakpoint - register a hardware breakpoint for user space | 302 | * register_user_hw_breakpoint - register a hardware breakpoint for user space |
| 294 | * @attr: breakpoint attributes | 303 | * @attr: breakpoint attributes |
| @@ -297,7 +306,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp) | |||
| 297 | */ | 306 | */ |
| 298 | struct perf_event * | 307 | struct perf_event * |
| 299 | register_user_hw_breakpoint(struct perf_event_attr *attr, | 308 | register_user_hw_breakpoint(struct perf_event_attr *attr, |
| 300 | perf_callback_t triggered, | 309 | perf_overflow_handler_t triggered, |
| 301 | struct task_struct *tsk) | 310 | struct task_struct *tsk) |
| 302 | { | 311 | { |
| 303 | return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); | 312 | return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); |
| @@ -311,19 +320,40 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); | |||
| 311 | * @triggered: callback to trigger when we hit the breakpoint | 320 | * @triggered: callback to trigger when we hit the breakpoint |
| 312 | * @tsk: pointer to 'task_struct' of the process to which the address belongs | 321 | * @tsk: pointer to 'task_struct' of the process to which the address belongs |
| 313 | */ | 322 | */ |
| 314 | struct perf_event * | 323 | int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) |
| 315 | modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr, | ||
| 316 | perf_callback_t triggered, | ||
| 317 | struct task_struct *tsk) | ||
| 318 | { | 324 | { |
| 319 | /* | 325 | u64 old_addr = bp->attr.bp_addr; |
| 320 | * FIXME: do it without unregistering | 326 | int old_type = bp->attr.bp_type; |
| 321 | * - We don't want to lose our slot | 327 | int old_len = bp->attr.bp_len; |
| 322 | * - If the new bp is incorrect, don't lose the older one | 328 | int err = 0; |
| 323 | */ | ||
| 324 | unregister_hw_breakpoint(bp); | ||
| 325 | 329 | ||
| 326 | return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); | 330 | perf_event_disable(bp); |
| 331 | |||
| 332 | bp->attr.bp_addr = attr->bp_addr; | ||
| 333 | bp->attr.bp_type = attr->bp_type; | ||
| 334 | bp->attr.bp_len = attr->bp_len; | ||
| 335 | |||
| 336 | if (attr->disabled) | ||
| 337 | goto end; | ||
| 338 | |||
| 339 | err = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | ||
| 340 | if (!err) | ||
| 341 | perf_event_enable(bp); | ||
| 342 | |||
| 343 | if (err) { | ||
| 344 | bp->attr.bp_addr = old_addr; | ||
| 345 | bp->attr.bp_type = old_type; | ||
| 346 | bp->attr.bp_len = old_len; | ||
| 347 | if (!bp->attr.disabled) | ||
| 348 | perf_event_enable(bp); | ||
| 349 | |||
| 350 | return err; | ||
| 351 | } | ||
| 352 | |||
| 353 | end: | ||
| 354 | bp->attr.disabled = attr->disabled; | ||
| 355 | |||
| 356 | return 0; | ||
| 327 | } | 357 | } |
| 328 | EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); | 358 | EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); |
| 329 | 359 | ||
| @@ -348,7 +378,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); | |||
| 348 | */ | 378 | */ |
| 349 | struct perf_event ** | 379 | struct perf_event ** |
| 350 | register_wide_hw_breakpoint(struct perf_event_attr *attr, | 380 | register_wide_hw_breakpoint(struct perf_event_attr *attr, |
| 351 | perf_callback_t triggered) | 381 | perf_overflow_handler_t triggered) |
| 352 | { | 382 | { |
| 353 | struct perf_event **cpu_events, **pevent, *bp; | 383 | struct perf_event **cpu_events, **pevent, *bp; |
| 354 | long err; | 384 | long err; |
diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 7d7014634022..2eb517e23514 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c | |||
| @@ -129,6 +129,7 @@ struct task_struct *kgdb_usethread; | |||
| 129 | struct task_struct *kgdb_contthread; | 129 | struct task_struct *kgdb_contthread; |
| 130 | 130 | ||
| 131 | int kgdb_single_step; | 131 | int kgdb_single_step; |
| 132 | pid_t kgdb_sstep_pid; | ||
| 132 | 133 | ||
| 133 | /* Our I/O buffers. */ | 134 | /* Our I/O buffers. */ |
| 134 | static char remcom_in_buffer[BUFMAX]; | 135 | static char remcom_in_buffer[BUFMAX]; |
| @@ -541,12 +542,17 @@ static struct task_struct *getthread(struct pt_regs *regs, int tid) | |||
| 541 | */ | 542 | */ |
| 542 | if (tid == 0 || tid == -1) | 543 | if (tid == 0 || tid == -1) |
| 543 | tid = -atomic_read(&kgdb_active) - 2; | 544 | tid = -atomic_read(&kgdb_active) - 2; |
| 544 | if (tid < 0) { | 545 | if (tid < -1 && tid > -NR_CPUS - 2) { |
| 545 | if (kgdb_info[-tid - 2].task) | 546 | if (kgdb_info[-tid - 2].task) |
| 546 | return kgdb_info[-tid - 2].task; | 547 | return kgdb_info[-tid - 2].task; |
| 547 | else | 548 | else |
| 548 | return idle_task(-tid - 2); | 549 | return idle_task(-tid - 2); |
| 549 | } | 550 | } |
| 551 | if (tid <= 0) { | ||
| 552 | printk(KERN_ERR "KGDB: Internal thread select error\n"); | ||
| 553 | dump_stack(); | ||
| 554 | return NULL; | ||
| 555 | } | ||
| 550 | 556 | ||
| 551 | /* | 557 | /* |
| 552 | * find_task_by_pid_ns() does not take the tasklist lock anymore | 558 | * find_task_by_pid_ns() does not take the tasklist lock anymore |
| @@ -619,7 +625,8 @@ static void kgdb_flush_swbreak_addr(unsigned long addr) | |||
| 619 | static int kgdb_activate_sw_breakpoints(void) | 625 | static int kgdb_activate_sw_breakpoints(void) |
| 620 | { | 626 | { |
| 621 | unsigned long addr; | 627 | unsigned long addr; |
| 622 | int error = 0; | 628 | int error; |
| 629 | int ret = 0; | ||
| 623 | int i; | 630 | int i; |
| 624 | 631 | ||
| 625 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { | 632 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { |
| @@ -629,13 +636,16 @@ static int kgdb_activate_sw_breakpoints(void) | |||
| 629 | addr = kgdb_break[i].bpt_addr; | 636 | addr = kgdb_break[i].bpt_addr; |
| 630 | error = kgdb_arch_set_breakpoint(addr, | 637 | error = kgdb_arch_set_breakpoint(addr, |
| 631 | kgdb_break[i].saved_instr); | 638 | kgdb_break[i].saved_instr); |
| 632 | if (error) | 639 | if (error) { |
| 633 | return error; | 640 | ret = error; |
| 641 | printk(KERN_INFO "KGDB: BP install failed: %lx", addr); | ||
| 642 | continue; | ||
| 643 | } | ||
| 634 | 644 | ||
| 635 | kgdb_flush_swbreak_addr(addr); | 645 | kgdb_flush_swbreak_addr(addr); |
| 636 | kgdb_break[i].state = BP_ACTIVE; | 646 | kgdb_break[i].state = BP_ACTIVE; |
| 637 | } | 647 | } |
| 638 | return 0; | 648 | return ret; |
| 639 | } | 649 | } |
| 640 | 650 | ||
| 641 | static int kgdb_set_sw_break(unsigned long addr) | 651 | static int kgdb_set_sw_break(unsigned long addr) |
| @@ -682,7 +692,8 @@ static int kgdb_set_sw_break(unsigned long addr) | |||
| 682 | static int kgdb_deactivate_sw_breakpoints(void) | 692 | static int kgdb_deactivate_sw_breakpoints(void) |
| 683 | { | 693 | { |
| 684 | unsigned long addr; | 694 | unsigned long addr; |
| 685 | int error = 0; | 695 | int error; |
| 696 | int ret = 0; | ||
| 686 | int i; | 697 | int i; |
| 687 | 698 | ||
| 688 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { | 699 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { |
| @@ -691,13 +702,15 @@ static int kgdb_deactivate_sw_breakpoints(void) | |||
| 691 | addr = kgdb_break[i].bpt_addr; | 702 | addr = kgdb_break[i].bpt_addr; |
| 692 | error = kgdb_arch_remove_breakpoint(addr, | 703 | error = kgdb_arch_remove_breakpoint(addr, |
| 693 | kgdb_break[i].saved_instr); | 704 | kgdb_break[i].saved_instr); |
| 694 | if (error) | 705 | if (error) { |
| 695 | return error; | 706 | printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr); |
| 707 | ret = error; | ||
| 708 | } | ||
| 696 | 709 | ||
| 697 | kgdb_flush_swbreak_addr(addr); | 710 | kgdb_flush_swbreak_addr(addr); |
| 698 | kgdb_break[i].state = BP_SET; | 711 | kgdb_break[i].state = BP_SET; |
| 699 | } | 712 | } |
| 700 | return 0; | 713 | return ret; |
| 701 | } | 714 | } |
| 702 | 715 | ||
| 703 | static int kgdb_remove_sw_break(unsigned long addr) | 716 | static int kgdb_remove_sw_break(unsigned long addr) |
| @@ -1204,8 +1217,10 @@ static int gdb_cmd_exception_pass(struct kgdb_state *ks) | |||
| 1204 | return 1; | 1217 | return 1; |
| 1205 | 1218 | ||
| 1206 | } else { | 1219 | } else { |
| 1207 | error_packet(remcom_out_buffer, -EINVAL); | 1220 | kgdb_msg_write("KGDB only knows signal 9 (pass)" |
| 1208 | return 0; | 1221 | " and 15 (pass and disconnect)\n" |
| 1222 | "Executing a continue without signal passing\n", 0); | ||
| 1223 | remcom_in_buffer[0] = 'c'; | ||
| 1209 | } | 1224 | } |
| 1210 | 1225 | ||
| 1211 | /* Indicate fall through */ | 1226 | /* Indicate fall through */ |
| @@ -1395,6 +1410,7 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) | |||
| 1395 | struct kgdb_state kgdb_var; | 1410 | struct kgdb_state kgdb_var; |
| 1396 | struct kgdb_state *ks = &kgdb_var; | 1411 | struct kgdb_state *ks = &kgdb_var; |
| 1397 | unsigned long flags; | 1412 | unsigned long flags; |
| 1413 | int sstep_tries = 100; | ||
| 1398 | int error = 0; | 1414 | int error = 0; |
| 1399 | int i, cpu; | 1415 | int i, cpu; |
| 1400 | 1416 | ||
| @@ -1425,13 +1441,14 @@ acquirelock: | |||
| 1425 | cpu_relax(); | 1441 | cpu_relax(); |
| 1426 | 1442 | ||
| 1427 | /* | 1443 | /* |
| 1428 | * Do not start the debugger connection on this CPU if the last | 1444 | * For single stepping, try to only enter on the processor |
| 1429 | * instance of the exception handler wanted to come into the | 1445 | * that was single stepping. To gaurd against a deadlock, the |
| 1430 | * debugger on a different CPU via a single step | 1446 | * kernel will only try for the value of sstep_tries before |
| 1447 | * giving up and continuing on. | ||
| 1431 | */ | 1448 | */ |
| 1432 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1 && | 1449 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1 && |
| 1433 | atomic_read(&kgdb_cpu_doing_single_step) != cpu) { | 1450 | (kgdb_info[cpu].task && |
| 1434 | 1451 | kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { | |
| 1435 | atomic_set(&kgdb_active, -1); | 1452 | atomic_set(&kgdb_active, -1); |
| 1436 | touch_softlockup_watchdog(); | 1453 | touch_softlockup_watchdog(); |
| 1437 | clocksource_touch_watchdog(); | 1454 | clocksource_touch_watchdog(); |
| @@ -1524,6 +1541,13 @@ acquirelock: | |||
| 1524 | } | 1541 | } |
| 1525 | 1542 | ||
| 1526 | kgdb_restore: | 1543 | kgdb_restore: |
| 1544 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { | ||
| 1545 | int sstep_cpu = atomic_read(&kgdb_cpu_doing_single_step); | ||
| 1546 | if (kgdb_info[sstep_cpu].task) | ||
| 1547 | kgdb_sstep_pid = kgdb_info[sstep_cpu].task->pid; | ||
| 1548 | else | ||
| 1549 | kgdb_sstep_pid = 0; | ||
| 1550 | } | ||
| 1527 | /* Free kgdb_active */ | 1551 | /* Free kgdb_active */ |
| 1528 | atomic_set(&kgdb_active, -1); | 1552 | atomic_set(&kgdb_active, -1); |
| 1529 | touch_softlockup_watchdog(); | 1553 | touch_softlockup_watchdog(); |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index f5dcd36d3151..429540c70d3f 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
| @@ -140,7 +140,8 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock) | |||
| 140 | } | 140 | } |
| 141 | 141 | ||
| 142 | #ifdef CONFIG_LOCK_STAT | 142 | #ifdef CONFIG_LOCK_STAT |
| 143 | static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); | 143 | static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], |
| 144 | cpu_lock_stats); | ||
| 144 | 145 | ||
| 145 | static inline u64 lockstat_clock(void) | 146 | static inline u64 lockstat_clock(void) |
| 146 | { | 147 | { |
| @@ -168,7 +169,7 @@ static void lock_time_inc(struct lock_time *lt, u64 time) | |||
| 168 | if (time > lt->max) | 169 | if (time > lt->max) |
| 169 | lt->max = time; | 170 | lt->max = time; |
| 170 | 171 | ||
| 171 | if (time < lt->min || !lt->min) | 172 | if (time < lt->min || !lt->nr) |
| 172 | lt->min = time; | 173 | lt->min = time; |
| 173 | 174 | ||
| 174 | lt->total += time; | 175 | lt->total += time; |
| @@ -177,8 +178,15 @@ static void lock_time_inc(struct lock_time *lt, u64 time) | |||
| 177 | 178 | ||
| 178 | static inline void lock_time_add(struct lock_time *src, struct lock_time *dst) | 179 | static inline void lock_time_add(struct lock_time *src, struct lock_time *dst) |
| 179 | { | 180 | { |
| 180 | dst->min += src->min; | 181 | if (!src->nr) |
| 181 | dst->max += src->max; | 182 | return; |
| 183 | |||
| 184 | if (src->max > dst->max) | ||
| 185 | dst->max = src->max; | ||
| 186 | |||
| 187 | if (src->min < dst->min || !dst->nr) | ||
| 188 | dst->min = src->min; | ||
| 189 | |||
| 182 | dst->total += src->total; | 190 | dst->total += src->total; |
| 183 | dst->nr += src->nr; | 191 | dst->nr += src->nr; |
| 184 | } | 192 | } |
| @@ -191,7 +199,7 @@ struct lock_class_stats lock_stats(struct lock_class *class) | |||
| 191 | memset(&stats, 0, sizeof(struct lock_class_stats)); | 199 | memset(&stats, 0, sizeof(struct lock_class_stats)); |
| 192 | for_each_possible_cpu(cpu) { | 200 | for_each_possible_cpu(cpu) { |
| 193 | struct lock_class_stats *pcs = | 201 | struct lock_class_stats *pcs = |
| 194 | &per_cpu(lock_stats, cpu)[class - lock_classes]; | 202 | &per_cpu(cpu_lock_stats, cpu)[class - lock_classes]; |
| 195 | 203 | ||
| 196 | for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) | 204 | for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) |
| 197 | stats.contention_point[i] += pcs->contention_point[i]; | 205 | stats.contention_point[i] += pcs->contention_point[i]; |
| @@ -218,7 +226,7 @@ void clear_lock_stats(struct lock_class *class) | |||
| 218 | 226 | ||
| 219 | for_each_possible_cpu(cpu) { | 227 | for_each_possible_cpu(cpu) { |
| 220 | struct lock_class_stats *cpu_stats = | 228 | struct lock_class_stats *cpu_stats = |
| 221 | &per_cpu(lock_stats, cpu)[class - lock_classes]; | 229 | &per_cpu(cpu_lock_stats, cpu)[class - lock_classes]; |
| 222 | 230 | ||
| 223 | memset(cpu_stats, 0, sizeof(struct lock_class_stats)); | 231 | memset(cpu_stats, 0, sizeof(struct lock_class_stats)); |
| 224 | } | 232 | } |
| @@ -228,12 +236,12 @@ void clear_lock_stats(struct lock_class *class) | |||
| 228 | 236 | ||
| 229 | static struct lock_class_stats *get_lock_stats(struct lock_class *class) | 237 | static struct lock_class_stats *get_lock_stats(struct lock_class *class) |
| 230 | { | 238 | { |
| 231 | return &get_cpu_var(lock_stats)[class - lock_classes]; | 239 | return &get_cpu_var(cpu_lock_stats)[class - lock_classes]; |
| 232 | } | 240 | } |
| 233 | 241 | ||
| 234 | static void put_lock_stats(struct lock_class_stats *stats) | 242 | static void put_lock_stats(struct lock_class_stats *stats) |
| 235 | { | 243 | { |
| 236 | put_cpu_var(lock_stats); | 244 | put_cpu_var(cpu_lock_stats); |
| 237 | } | 245 | } |
| 238 | 246 | ||
| 239 | static void lock_release_holdtime(struct held_lock *hlock) | 247 | static void lock_release_holdtime(struct held_lock *hlock) |
| @@ -379,7 +387,8 @@ static int save_trace(struct stack_trace *trace) | |||
| 379 | * complete trace that maxes out the entries provided will be reported | 387 | * complete trace that maxes out the entries provided will be reported |
| 380 | * as incomplete, friggin useless </rant> | 388 | * as incomplete, friggin useless </rant> |
| 381 | */ | 389 | */ |
| 382 | if (trace->entries[trace->nr_entries-1] == ULONG_MAX) | 390 | if (trace->nr_entries != 0 && |
| 391 | trace->entries[trace->nr_entries-1] == ULONG_MAX) | ||
| 383 | trace->nr_entries--; | 392 | trace->nr_entries--; |
| 384 | 393 | ||
| 385 | trace->max_entries = trace->nr_entries; | 394 | trace->max_entries = trace->nr_entries; |
diff --git a/kernel/module.c b/kernel/module.c index 5842a71cf052..12afc5a3ddd3 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -370,8 +370,6 @@ EXPORT_SYMBOL_GPL(find_module); | |||
| 370 | 370 | ||
| 371 | #ifdef CONFIG_SMP | 371 | #ifdef CONFIG_SMP |
| 372 | 372 | ||
| 373 | #ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA | ||
| 374 | |||
| 375 | static void *percpu_modalloc(unsigned long size, unsigned long align, | 373 | static void *percpu_modalloc(unsigned long size, unsigned long align, |
| 376 | const char *name) | 374 | const char *name) |
| 377 | { | 375 | { |
| @@ -395,154 +393,6 @@ static void percpu_modfree(void *freeme) | |||
| 395 | free_percpu(freeme); | 393 | free_percpu(freeme); |
| 396 | } | 394 | } |
| 397 | 395 | ||
| 398 | #else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */ | ||
| 399 | |||
| 400 | /* Number of blocks used and allocated. */ | ||
| 401 | static unsigned int pcpu_num_used, pcpu_num_allocated; | ||
| 402 | /* Size of each block. -ve means used. */ | ||
| 403 | static int *pcpu_size; | ||
| 404 | |||
| 405 | static int split_block(unsigned int i, unsigned short size) | ||
| 406 | { | ||
| 407 | /* Reallocation required? */ | ||
| 408 | if (pcpu_num_used + 1 > pcpu_num_allocated) { | ||
| 409 | int *new; | ||
| 410 | |||
| 411 | new = krealloc(pcpu_size, sizeof(new[0])*pcpu_num_allocated*2, | ||
| 412 | GFP_KERNEL); | ||
| 413 | if (!new) | ||
| 414 | return 0; | ||
| 415 | |||
| 416 | pcpu_num_allocated *= 2; | ||
| 417 | pcpu_size = new; | ||
| 418 | } | ||
| 419 | |||
| 420 | /* Insert a new subblock */ | ||
| 421 | memmove(&pcpu_size[i+1], &pcpu_size[i], | ||
| 422 | sizeof(pcpu_size[0]) * (pcpu_num_used - i)); | ||
| 423 | pcpu_num_used++; | ||
| 424 | |||
| 425 | pcpu_size[i+1] -= size; | ||
| 426 | pcpu_size[i] = size; | ||
| 427 | return 1; | ||
| 428 | } | ||
| 429 | |||
| 430 | static inline unsigned int block_size(int val) | ||
| 431 | { | ||
| 432 | if (val < 0) | ||
| 433 | return -val; | ||
| 434 | return val; | ||
| 435 | } | ||
| 436 | |||
| 437 | static void *percpu_modalloc(unsigned long size, unsigned long align, | ||
| 438 | const char *name) | ||
| 439 | { | ||
| 440 | unsigned long extra; | ||
| 441 | unsigned int i; | ||
| 442 | void *ptr; | ||
| 443 | int cpu; | ||
| 444 | |||
| 445 | if (align > PAGE_SIZE) { | ||
| 446 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", | ||
| 447 | name, align, PAGE_SIZE); | ||
| 448 | align = PAGE_SIZE; | ||
| 449 | } | ||
| 450 | |||
| 451 | ptr = __per_cpu_start; | ||
| 452 | for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { | ||
| 453 | /* Extra for alignment requirement. */ | ||
| 454 | extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; | ||
| 455 | BUG_ON(i == 0 && extra != 0); | ||
| 456 | |||
| 457 | if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size) | ||
| 458 | continue; | ||
| 459 | |||
| 460 | /* Transfer extra to previous block. */ | ||
| 461 | if (pcpu_size[i-1] < 0) | ||
| 462 | pcpu_size[i-1] -= extra; | ||
| 463 | else | ||
| 464 | pcpu_size[i-1] += extra; | ||
| 465 | pcpu_size[i] -= extra; | ||
| 466 | ptr += extra; | ||
| 467 | |||
| 468 | /* Split block if warranted */ | ||
| 469 | if (pcpu_size[i] - size > sizeof(unsigned long)) | ||
| 470 | if (!split_block(i, size)) | ||
| 471 | return NULL; | ||
| 472 | |||
| 473 | /* add the per-cpu scanning areas */ | ||
| 474 | for_each_possible_cpu(cpu) | ||
| 475 | kmemleak_alloc(ptr + per_cpu_offset(cpu), size, 0, | ||
| 476 | GFP_KERNEL); | ||
| 477 | |||
| 478 | /* Mark allocated */ | ||
| 479 | pcpu_size[i] = -pcpu_size[i]; | ||
| 480 | return ptr; | ||
| 481 | } | ||
| 482 | |||
| 483 | printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", | ||
| 484 | size); | ||
| 485 | return NULL; | ||
| 486 | } | ||
| 487 | |||
| 488 | static void percpu_modfree(void *freeme) | ||
| 489 | { | ||
| 490 | unsigned int i; | ||
| 491 | void *ptr = __per_cpu_start + block_size(pcpu_size[0]); | ||
| 492 | int cpu; | ||
| 493 | |||
| 494 | /* First entry is core kernel percpu data. */ | ||
| 495 | for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { | ||
| 496 | if (ptr == freeme) { | ||
| 497 | pcpu_size[i] = -pcpu_size[i]; | ||
| 498 | goto free; | ||
| 499 | } | ||
| 500 | } | ||
| 501 | BUG(); | ||
| 502 | |||
| 503 | free: | ||
| 504 | /* remove the per-cpu scanning areas */ | ||
| 505 | for_each_possible_cpu(cpu) | ||
| 506 | kmemleak_free(freeme + per_cpu_offset(cpu)); | ||
| 507 | |||
| 508 | /* Merge with previous? */ | ||
| 509 | if (pcpu_size[i-1] >= 0) { | ||
| 510 | pcpu_size[i-1] += pcpu_size[i]; | ||
| 511 | pcpu_num_used--; | ||
| 512 | memmove(&pcpu_size[i], &pcpu_size[i+1], | ||
| 513 | (pcpu_num_used - i) * sizeof(pcpu_size[0])); | ||
| 514 | i--; | ||
| 515 | } | ||
| 516 | /* Merge with next? */ | ||
| 517 | if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) { | ||
| 518 | pcpu_size[i] += pcpu_size[i+1]; | ||
| 519 | pcpu_num_used--; | ||
| 520 | memmove(&pcpu_size[i+1], &pcpu_size[i+2], | ||
| 521 | (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0])); | ||
| 522 | } | ||
| 523 | } | ||
| 524 | |||
| 525 | static int percpu_modinit(void) | ||
| 526 | { | ||
| 527 | pcpu_num_used = 2; | ||
| 528 | pcpu_num_allocated = 2; | ||
| 529 | pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, | ||
| 530 | GFP_KERNEL); | ||
| 531 | /* Static in-kernel percpu data (used). */ | ||
| 532 | pcpu_size[0] = -(__per_cpu_end-__per_cpu_start); | ||
| 533 | /* Free room. */ | ||
| 534 | pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; | ||
| 535 | if (pcpu_size[1] < 0) { | ||
| 536 | printk(KERN_ERR "No per-cpu room for modules.\n"); | ||
| 537 | pcpu_num_used = 1; | ||
| 538 | } | ||
| 539 | |||
| 540 | return 0; | ||
| 541 | } | ||
| 542 | __initcall(percpu_modinit); | ||
| 543 | |||
| 544 | #endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ | ||
| 545 | |||
| 546 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, | 396 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, |
| 547 | Elf_Shdr *sechdrs, | 397 | Elf_Shdr *sechdrs, |
| 548 | const char *secstrings) | 398 | const char *secstrings) |
diff --git a/kernel/params.c b/kernel/params.c index d656c276508d..cf1b69183127 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/err.h> | 24 | #include <linux/err.h> |
| 25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
| 26 | #include <linux/ctype.h> | 26 | #include <linux/ctype.h> |
| 27 | #include <linux/string.h> | ||
| 27 | 28 | ||
| 28 | #if 0 | 29 | #if 0 |
| 29 | #define DEBUGP printk | 30 | #define DEBUGP printk |
| @@ -122,9 +123,7 @@ static char *next_arg(char *args, char **param, char **val) | |||
| 122 | next = args + i; | 123 | next = args + i; |
| 123 | 124 | ||
| 124 | /* Chew up trailing spaces. */ | 125 | /* Chew up trailing spaces. */ |
| 125 | while (isspace(*next)) | 126 | return skip_spaces(next); |
| 126 | next++; | ||
| 127 | return next; | ||
| 128 | } | 127 | } |
| 129 | 128 | ||
| 130 | /* Args looks like "foo=bar,bar2 baz=fuz wiz". */ | 129 | /* Args looks like "foo=bar,bar2 baz=fuz wiz". */ |
| @@ -139,8 +138,7 @@ int parse_args(const char *name, | |||
| 139 | DEBUGP("Parsing ARGS: %s\n", args); | 138 | DEBUGP("Parsing ARGS: %s\n", args); |
| 140 | 139 | ||
| 141 | /* Chew leading spaces */ | 140 | /* Chew leading spaces */ |
| 142 | while (isspace(*args)) | 141 | args = skip_spaces(args); |
| 143 | args++; | ||
| 144 | 142 | ||
| 145 | while (*args) { | 143 | while (*args) { |
| 146 | int ret; | 144 | int ret; |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 40a996ec39fa..e73e53c7582f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
| @@ -36,7 +36,7 @@ | |||
| 36 | /* | 36 | /* |
| 37 | * Each CPU has a list of per CPU events: | 37 | * Each CPU has a list of per CPU events: |
| 38 | */ | 38 | */ |
| 39 | DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); | 39 | static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); |
| 40 | 40 | ||
| 41 | int perf_max_events __read_mostly = 1; | 41 | int perf_max_events __read_mostly = 1; |
| 42 | static int perf_reserved_percpu __read_mostly; | 42 | static int perf_reserved_percpu __read_mostly; |
| @@ -567,7 +567,7 @@ static void __perf_event_disable(void *info) | |||
| 567 | * is the current context on this CPU and preemption is disabled, | 567 | * is the current context on this CPU and preemption is disabled, |
| 568 | * hence we can't get into perf_event_task_sched_out for this context. | 568 | * hence we can't get into perf_event_task_sched_out for this context. |
| 569 | */ | 569 | */ |
| 570 | static void perf_event_disable(struct perf_event *event) | 570 | void perf_event_disable(struct perf_event *event) |
| 571 | { | 571 | { |
| 572 | struct perf_event_context *ctx = event->ctx; | 572 | struct perf_event_context *ctx = event->ctx; |
| 573 | struct task_struct *task = ctx->task; | 573 | struct task_struct *task = ctx->task; |
| @@ -971,7 +971,7 @@ static void __perf_event_enable(void *info) | |||
| 971 | * perf_event_for_each_child or perf_event_for_each as described | 971 | * perf_event_for_each_child or perf_event_for_each as described |
| 972 | * for perf_event_disable. | 972 | * for perf_event_disable. |
| 973 | */ | 973 | */ |
| 974 | static void perf_event_enable(struct perf_event *event) | 974 | void perf_event_enable(struct perf_event *event) |
| 975 | { | 975 | { |
| 976 | struct perf_event_context *ctx = event->ctx; | 976 | struct perf_event_context *ctx = event->ctx; |
| 977 | struct task_struct *task = ctx->task; | 977 | struct task_struct *task = ctx->task; |
| @@ -1579,7 +1579,6 @@ static void | |||
| 1579 | __perf_event_init_context(struct perf_event_context *ctx, | 1579 | __perf_event_init_context(struct perf_event_context *ctx, |
| 1580 | struct task_struct *task) | 1580 | struct task_struct *task) |
| 1581 | { | 1581 | { |
| 1582 | memset(ctx, 0, sizeof(*ctx)); | ||
| 1583 | spin_lock_init(&ctx->lock); | 1582 | spin_lock_init(&ctx->lock); |
| 1584 | mutex_init(&ctx->mutex); | 1583 | mutex_init(&ctx->mutex); |
| 1585 | INIT_LIST_HEAD(&ctx->group_list); | 1584 | INIT_LIST_HEAD(&ctx->group_list); |
| @@ -1654,7 +1653,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) | |||
| 1654 | } | 1653 | } |
| 1655 | 1654 | ||
| 1656 | if (!ctx) { | 1655 | if (!ctx) { |
| 1657 | ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); | 1656 | ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); |
| 1658 | err = -ENOMEM; | 1657 | err = -ENOMEM; |
| 1659 | if (!ctx) | 1658 | if (!ctx) |
| 1660 | goto errout; | 1659 | goto errout; |
| @@ -4011,6 +4010,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
| 4011 | event->pmu->read(event); | 4010 | event->pmu->read(event); |
| 4012 | 4011 | ||
| 4013 | data.addr = 0; | 4012 | data.addr = 0; |
| 4013 | data.raw = NULL; | ||
| 4014 | data.period = event->hw.last_period; | 4014 | data.period = event->hw.last_period; |
| 4015 | regs = get_irq_regs(); | 4015 | regs = get_irq_regs(); |
| 4016 | /* | 4016 | /* |
| @@ -4080,8 +4080,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event) | |||
| 4080 | u64 now; | 4080 | u64 now; |
| 4081 | 4081 | ||
| 4082 | now = cpu_clock(cpu); | 4082 | now = cpu_clock(cpu); |
| 4083 | prev = atomic64_read(&event->hw.prev_count); | 4083 | prev = atomic64_xchg(&event->hw.prev_count, now); |
| 4084 | atomic64_set(&event->hw.prev_count, now); | ||
| 4085 | atomic64_add(now - prev, &event->count); | 4084 | atomic64_add(now - prev, &event->count); |
| 4086 | } | 4085 | } |
| 4087 | 4086 | ||
| @@ -4286,15 +4285,8 @@ static void bp_perf_event_destroy(struct perf_event *event) | |||
| 4286 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | 4285 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) |
| 4287 | { | 4286 | { |
| 4288 | int err; | 4287 | int err; |
| 4289 | /* | 4288 | |
| 4290 | * The breakpoint is already filled if we haven't created the counter | 4289 | err = register_perf_hw_breakpoint(bp); |
| 4291 | * through perf syscall | ||
| 4292 | * FIXME: manage to get trigerred to NULL if it comes from syscalls | ||
| 4293 | */ | ||
| 4294 | if (!bp->callback) | ||
| 4295 | err = register_perf_hw_breakpoint(bp); | ||
| 4296 | else | ||
| 4297 | err = __register_perf_hw_breakpoint(bp); | ||
| 4298 | if (err) | 4290 | if (err) |
| 4299 | return ERR_PTR(err); | 4291 | return ERR_PTR(err); |
| 4300 | 4292 | ||
| @@ -4308,6 +4300,7 @@ void perf_bp_event(struct perf_event *bp, void *data) | |||
| 4308 | struct perf_sample_data sample; | 4300 | struct perf_sample_data sample; |
| 4309 | struct pt_regs *regs = data; | 4301 | struct pt_regs *regs = data; |
| 4310 | 4302 | ||
| 4303 | sample.raw = NULL; | ||
| 4311 | sample.addr = bp->attr.bp_addr; | 4304 | sample.addr = bp->attr.bp_addr; |
| 4312 | 4305 | ||
| 4313 | if (!perf_exclude_event(bp, regs)) | 4306 | if (!perf_exclude_event(bp, regs)) |
| @@ -4390,7 +4383,7 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
| 4390 | struct perf_event_context *ctx, | 4383 | struct perf_event_context *ctx, |
| 4391 | struct perf_event *group_leader, | 4384 | struct perf_event *group_leader, |
| 4392 | struct perf_event *parent_event, | 4385 | struct perf_event *parent_event, |
| 4393 | perf_callback_t callback, | 4386 | perf_overflow_handler_t overflow_handler, |
| 4394 | gfp_t gfpflags) | 4387 | gfp_t gfpflags) |
| 4395 | { | 4388 | { |
| 4396 | const struct pmu *pmu; | 4389 | const struct pmu *pmu; |
| @@ -4433,10 +4426,10 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
| 4433 | 4426 | ||
| 4434 | event->state = PERF_EVENT_STATE_INACTIVE; | 4427 | event->state = PERF_EVENT_STATE_INACTIVE; |
| 4435 | 4428 | ||
| 4436 | if (!callback && parent_event) | 4429 | if (!overflow_handler && parent_event) |
| 4437 | callback = parent_event->callback; | 4430 | overflow_handler = parent_event->overflow_handler; |
| 4438 | 4431 | ||
| 4439 | event->callback = callback; | 4432 | event->overflow_handler = overflow_handler; |
| 4440 | 4433 | ||
| 4441 | if (attr->disabled) | 4434 | if (attr->disabled) |
| 4442 | event->state = PERF_EVENT_STATE_OFF; | 4435 | event->state = PERF_EVENT_STATE_OFF; |
| @@ -4776,7 +4769,8 @@ err_put_context: | |||
| 4776 | */ | 4769 | */ |
| 4777 | struct perf_event * | 4770 | struct perf_event * |
| 4778 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | 4771 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, |
| 4779 | pid_t pid, perf_callback_t callback) | 4772 | pid_t pid, |
| 4773 | perf_overflow_handler_t overflow_handler) | ||
| 4780 | { | 4774 | { |
| 4781 | struct perf_event *event; | 4775 | struct perf_event *event; |
| 4782 | struct perf_event_context *ctx; | 4776 | struct perf_event_context *ctx; |
| @@ -4793,7 +4787,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
| 4793 | } | 4787 | } |
| 4794 | 4788 | ||
| 4795 | event = perf_event_alloc(attr, cpu, ctx, NULL, | 4789 | event = perf_event_alloc(attr, cpu, ctx, NULL, |
| 4796 | NULL, callback, GFP_KERNEL); | 4790 | NULL, overflow_handler, GFP_KERNEL); |
| 4797 | if (IS_ERR(event)) { | 4791 | if (IS_ERR(event)) { |
| 4798 | err = PTR_ERR(event); | 4792 | err = PTR_ERR(event); |
| 4799 | goto err_put_context; | 4793 | goto err_put_context; |
| @@ -5090,7 +5084,7 @@ again: | |||
| 5090 | */ | 5084 | */ |
| 5091 | int perf_event_init_task(struct task_struct *child) | 5085 | int perf_event_init_task(struct task_struct *child) |
| 5092 | { | 5086 | { |
| 5093 | struct perf_event_context *child_ctx, *parent_ctx; | 5087 | struct perf_event_context *child_ctx = NULL, *parent_ctx; |
| 5094 | struct perf_event_context *cloned_ctx; | 5088 | struct perf_event_context *cloned_ctx; |
| 5095 | struct perf_event *event; | 5089 | struct perf_event *event; |
| 5096 | struct task_struct *parent = current; | 5090 | struct task_struct *parent = current; |
| @@ -5106,20 +5100,6 @@ int perf_event_init_task(struct task_struct *child) | |||
| 5106 | return 0; | 5100 | return 0; |
| 5107 | 5101 | ||
| 5108 | /* | 5102 | /* |
| 5109 | * This is executed from the parent task context, so inherit | ||
| 5110 | * events that have been marked for cloning. | ||
| 5111 | * First allocate and initialize a context for the child. | ||
| 5112 | */ | ||
| 5113 | |||
| 5114 | child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); | ||
| 5115 | if (!child_ctx) | ||
| 5116 | return -ENOMEM; | ||
| 5117 | |||
| 5118 | __perf_event_init_context(child_ctx, child); | ||
| 5119 | child->perf_event_ctxp = child_ctx; | ||
| 5120 | get_task_struct(child); | ||
| 5121 | |||
| 5122 | /* | ||
| 5123 | * If the parent's context is a clone, pin it so it won't get | 5103 | * If the parent's context is a clone, pin it so it won't get |
| 5124 | * swapped under us. | 5104 | * swapped under us. |
| 5125 | */ | 5105 | */ |
| @@ -5149,6 +5129,26 @@ int perf_event_init_task(struct task_struct *child) | |||
| 5149 | continue; | 5129 | continue; |
| 5150 | } | 5130 | } |
| 5151 | 5131 | ||
| 5132 | if (!child->perf_event_ctxp) { | ||
| 5133 | /* | ||
| 5134 | * This is executed from the parent task context, so | ||
| 5135 | * inherit events that have been marked for cloning. | ||
| 5136 | * First allocate and initialize a context for the | ||
| 5137 | * child. | ||
| 5138 | */ | ||
| 5139 | |||
| 5140 | child_ctx = kzalloc(sizeof(struct perf_event_context), | ||
| 5141 | GFP_KERNEL); | ||
| 5142 | if (!child_ctx) { | ||
| 5143 | ret = -ENOMEM; | ||
| 5144 | goto exit; | ||
| 5145 | } | ||
| 5146 | |||
| 5147 | __perf_event_init_context(child_ctx, child); | ||
| 5148 | child->perf_event_ctxp = child_ctx; | ||
| 5149 | get_task_struct(child); | ||
| 5150 | } | ||
| 5151 | |||
| 5152 | ret = inherit_group(event, parent, parent_ctx, | 5152 | ret = inherit_group(event, parent, parent_ctx, |
| 5153 | child, child_ctx); | 5153 | child, child_ctx); |
| 5154 | if (ret) { | 5154 | if (ret) { |
| @@ -5177,6 +5177,7 @@ int perf_event_init_task(struct task_struct *child) | |||
| 5177 | get_ctx(child_ctx->parent_ctx); | 5177 | get_ctx(child_ctx->parent_ctx); |
| 5178 | } | 5178 | } |
| 5179 | 5179 | ||
| 5180 | exit: | ||
| 5180 | mutex_unlock(&parent_ctx->mutex); | 5181 | mutex_unlock(&parent_ctx->mutex); |
| 5181 | 5182 | ||
| 5182 | perf_unpin_context(parent_ctx); | 5183 | perf_unpin_context(parent_ctx); |
diff --git a/kernel/power/console.c b/kernel/power/console.c index 5187136fe1de..218e5af90156 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <linux/vt_kern.h> | 7 | #include <linux/vt_kern.h> |
| 8 | #include <linux/kbd_kern.h> | 8 | #include <linux/kbd_kern.h> |
| 9 | #include <linux/console.h> | 9 | #include <linux/vt.h> |
| 10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
| 11 | #include "power.h" | 11 | #include "power.h" |
| 12 | 12 | ||
| @@ -21,8 +21,7 @@ int pm_prepare_console(void) | |||
| 21 | if (orig_fgconsole < 0) | 21 | if (orig_fgconsole < 0) |
| 22 | return 1; | 22 | return 1; |
| 23 | 23 | ||
| 24 | orig_kmsg = kmsg_redirect; | 24 | orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE); |
| 25 | kmsg_redirect = SUSPEND_CONSOLE; | ||
| 26 | return 0; | 25 | return 0; |
| 27 | } | 26 | } |
| 28 | 27 | ||
| @@ -30,7 +29,7 @@ void pm_restore_console(void) | |||
| 30 | { | 29 | { |
| 31 | if (orig_fgconsole >= 0) { | 30 | if (orig_fgconsole >= 0) { |
| 32 | vt_move_to_console(orig_fgconsole, 0); | 31 | vt_move_to_console(orig_fgconsole, 0); |
| 33 | kmsg_redirect = orig_kmsg; | 32 | vt_kmsg_redirect(orig_kmsg); |
| 34 | } | 33 | } |
| 35 | } | 34 | } |
| 36 | #endif | 35 | #endif |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index a621a67ef4e3..9bb52177af02 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -763,13 +763,13 @@ static void rcu_torture_timer(unsigned long unused) | |||
| 763 | /* Should not happen, but... */ | 763 | /* Should not happen, but... */ |
| 764 | pipe_count = RCU_TORTURE_PIPE_LEN; | 764 | pipe_count = RCU_TORTURE_PIPE_LEN; |
| 765 | } | 765 | } |
| 766 | ++__get_cpu_var(rcu_torture_count)[pipe_count]; | 766 | __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]); |
| 767 | completed = cur_ops->completed() - completed; | 767 | completed = cur_ops->completed() - completed; |
| 768 | if (completed > RCU_TORTURE_PIPE_LEN) { | 768 | if (completed > RCU_TORTURE_PIPE_LEN) { |
| 769 | /* Should not happen, but... */ | 769 | /* Should not happen, but... */ |
| 770 | completed = RCU_TORTURE_PIPE_LEN; | 770 | completed = RCU_TORTURE_PIPE_LEN; |
| 771 | } | 771 | } |
| 772 | ++__get_cpu_var(rcu_torture_batch)[completed]; | 772 | __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]); |
| 773 | preempt_enable(); | 773 | preempt_enable(); |
| 774 | cur_ops->readunlock(idx); | 774 | cur_ops->readunlock(idx); |
| 775 | } | 775 | } |
| @@ -818,13 +818,13 @@ rcu_torture_reader(void *arg) | |||
| 818 | /* Should not happen, but... */ | 818 | /* Should not happen, but... */ |
| 819 | pipe_count = RCU_TORTURE_PIPE_LEN; | 819 | pipe_count = RCU_TORTURE_PIPE_LEN; |
| 820 | } | 820 | } |
| 821 | ++__get_cpu_var(rcu_torture_count)[pipe_count]; | 821 | __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]); |
| 822 | completed = cur_ops->completed() - completed; | 822 | completed = cur_ops->completed() - completed; |
| 823 | if (completed > RCU_TORTURE_PIPE_LEN) { | 823 | if (completed > RCU_TORTURE_PIPE_LEN) { |
| 824 | /* Should not happen, but... */ | 824 | /* Should not happen, but... */ |
| 825 | completed = RCU_TORTURE_PIPE_LEN; | 825 | completed = RCU_TORTURE_PIPE_LEN; |
| 826 | } | 826 | } |
| 827 | ++__get_cpu_var(rcu_torture_batch)[completed]; | 827 | __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]); |
| 828 | preempt_enable(); | 828 | preempt_enable(); |
| 829 | cur_ops->readunlock(idx); | 829 | cur_ops->readunlock(idx); |
| 830 | schedule(); | 830 | schedule(); |
diff --git a/kernel/resource.c b/kernel/resource.c index fb11a58b9594..dc15686b7a77 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
| @@ -308,35 +308,37 @@ static int find_resource(struct resource *root, struct resource *new, | |||
| 308 | void *alignf_data) | 308 | void *alignf_data) |
| 309 | { | 309 | { |
| 310 | struct resource *this = root->child; | 310 | struct resource *this = root->child; |
| 311 | resource_size_t start, end; | ||
| 311 | 312 | ||
| 312 | new->start = root->start; | 313 | start = root->start; |
| 313 | /* | 314 | /* |
| 314 | * Skip past an allocated resource that starts at 0, since the assignment | 315 | * Skip past an allocated resource that starts at 0, since the assignment |
| 315 | * of this->start - 1 to new->end below would cause an underflow. | 316 | * of this->start - 1 to new->end below would cause an underflow. |
| 316 | */ | 317 | */ |
| 317 | if (this && this->start == 0) { | 318 | if (this && this->start == 0) { |
| 318 | new->start = this->end + 1; | 319 | start = this->end + 1; |
| 319 | this = this->sibling; | 320 | this = this->sibling; |
| 320 | } | 321 | } |
| 321 | for(;;) { | 322 | for(;;) { |
| 322 | if (this) | 323 | if (this) |
| 323 | new->end = this->start - 1; | 324 | end = this->start - 1; |
| 324 | else | 325 | else |
| 325 | new->end = root->end; | 326 | end = root->end; |
| 326 | if (new->start < min) | 327 | if (start < min) |
| 327 | new->start = min; | 328 | start = min; |
| 328 | if (new->end > max) | 329 | if (end > max) |
| 329 | new->end = max; | 330 | end = max; |
| 330 | new->start = ALIGN(new->start, align); | 331 | start = ALIGN(start, align); |
| 331 | if (alignf) | 332 | if (alignf) |
| 332 | alignf(alignf_data, new, size, align); | 333 | alignf(alignf_data, new, size, align); |
| 333 | if (new->start < new->end && new->end - new->start >= size - 1) { | 334 | if (start < end && end - start >= size - 1) { |
| 334 | new->end = new->start + size - 1; | 335 | new->start = start; |
| 336 | new->end = start + size - 1; | ||
| 335 | return 0; | 337 | return 0; |
| 336 | } | 338 | } |
| 337 | if (!this) | 339 | if (!this) |
| 338 | break; | 340 | break; |
| 339 | new->start = this->end + 1; | 341 | start = this->end + 1; |
| 340 | this = this->sibling; | 342 | this = this->sibling; |
| 341 | } | 343 | } |
| 342 | return -EBUSY; | 344 | return -EBUSY; |
diff --git a/kernel/sched.c b/kernel/sched.c index e7f2cfa6a257..fd05861b2111 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -298,7 +298,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq); | |||
| 298 | 298 | ||
| 299 | #ifdef CONFIG_RT_GROUP_SCHED | 299 | #ifdef CONFIG_RT_GROUP_SCHED |
| 300 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | 300 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); |
| 301 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq); | 301 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq_var); |
| 302 | #endif /* CONFIG_RT_GROUP_SCHED */ | 302 | #endif /* CONFIG_RT_GROUP_SCHED */ |
| 303 | #else /* !CONFIG_USER_SCHED */ | 303 | #else /* !CONFIG_USER_SCHED */ |
| 304 | #define root_task_group init_task_group | 304 | #define root_task_group init_task_group |
| @@ -814,6 +814,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
| 814 | * default: 0.25ms | 814 | * default: 0.25ms |
| 815 | */ | 815 | */ |
| 816 | unsigned int sysctl_sched_shares_ratelimit = 250000; | 816 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
| 817 | unsigned int normalized_sysctl_sched_shares_ratelimit = 250000; | ||
| 817 | 818 | ||
| 818 | /* | 819 | /* |
| 819 | * Inject some fuzzyness into changing the per-cpu group shares | 820 | * Inject some fuzzyness into changing the per-cpu group shares |
| @@ -1614,7 +1615,7 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu, | |||
| 1614 | */ | 1615 | */ |
| 1615 | static int tg_shares_up(struct task_group *tg, void *data) | 1616 | static int tg_shares_up(struct task_group *tg, void *data) |
| 1616 | { | 1617 | { |
| 1617 | unsigned long weight, rq_weight = 0, shares = 0; | 1618 | unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0; |
| 1618 | unsigned long *usd_rq_weight; | 1619 | unsigned long *usd_rq_weight; |
| 1619 | struct sched_domain *sd = data; | 1620 | struct sched_domain *sd = data; |
| 1620 | unsigned long flags; | 1621 | unsigned long flags; |
| @@ -1630,6 +1631,7 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
| 1630 | weight = tg->cfs_rq[i]->load.weight; | 1631 | weight = tg->cfs_rq[i]->load.weight; |
| 1631 | usd_rq_weight[i] = weight; | 1632 | usd_rq_weight[i] = weight; |
| 1632 | 1633 | ||
| 1634 | rq_weight += weight; | ||
| 1633 | /* | 1635 | /* |
| 1634 | * If there are currently no tasks on the cpu pretend there | 1636 | * If there are currently no tasks on the cpu pretend there |
| 1635 | * is one of average load so that when a new task gets to | 1637 | * is one of average load so that when a new task gets to |
| @@ -1638,10 +1640,13 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
| 1638 | if (!weight) | 1640 | if (!weight) |
| 1639 | weight = NICE_0_LOAD; | 1641 | weight = NICE_0_LOAD; |
| 1640 | 1642 | ||
| 1641 | rq_weight += weight; | 1643 | sum_weight += weight; |
| 1642 | shares += tg->cfs_rq[i]->shares; | 1644 | shares += tg->cfs_rq[i]->shares; |
| 1643 | } | 1645 | } |
| 1644 | 1646 | ||
| 1647 | if (!rq_weight) | ||
| 1648 | rq_weight = sum_weight; | ||
| 1649 | |||
| 1645 | if ((!shares && rq_weight) || shares > tg->shares) | 1650 | if ((!shares && rq_weight) || shares > tg->shares) |
| 1646 | shares = tg->shares; | 1651 | shares = tg->shares; |
| 1647 | 1652 | ||
| @@ -1810,6 +1815,22 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
| 1810 | #endif | 1815 | #endif |
| 1811 | 1816 | ||
| 1812 | static void calc_load_account_active(struct rq *this_rq); | 1817 | static void calc_load_account_active(struct rq *this_rq); |
| 1818 | static void update_sysctl(void); | ||
| 1819 | static int get_update_sysctl_factor(void); | ||
| 1820 | |||
| 1821 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
| 1822 | { | ||
| 1823 | set_task_rq(p, cpu); | ||
| 1824 | #ifdef CONFIG_SMP | ||
| 1825 | /* | ||
| 1826 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
| 1827 | * successfuly executed on another CPU. We must ensure that updates of | ||
| 1828 | * per-task data have been completed by this moment. | ||
| 1829 | */ | ||
| 1830 | smp_wmb(); | ||
| 1831 | task_thread_info(p)->cpu = cpu; | ||
| 1832 | #endif | ||
| 1833 | } | ||
| 1813 | 1834 | ||
| 1814 | #include "sched_stats.h" | 1835 | #include "sched_stats.h" |
| 1815 | #include "sched_idletask.c" | 1836 | #include "sched_idletask.c" |
| @@ -1967,20 +1988,6 @@ inline int task_curr(const struct task_struct *p) | |||
| 1967 | return cpu_curr(task_cpu(p)) == p; | 1988 | return cpu_curr(task_cpu(p)) == p; |
| 1968 | } | 1989 | } |
| 1969 | 1990 | ||
| 1970 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
| 1971 | { | ||
| 1972 | set_task_rq(p, cpu); | ||
| 1973 | #ifdef CONFIG_SMP | ||
| 1974 | /* | ||
| 1975 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
| 1976 | * successfuly executed on another CPU. We must ensure that updates of | ||
| 1977 | * per-task data have been completed by this moment. | ||
| 1978 | */ | ||
| 1979 | smp_wmb(); | ||
| 1980 | task_thread_info(p)->cpu = cpu; | ||
| 1981 | #endif | ||
| 1982 | } | ||
| 1983 | |||
| 1984 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, | 1991 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, |
| 1985 | const struct sched_class *prev_class, | 1992 | const struct sched_class *prev_class, |
| 1986 | int oldprio, int running) | 1993 | int oldprio, int running) |
| @@ -2060,29 +2067,13 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
| 2060 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 2067 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
| 2061 | { | 2068 | { |
| 2062 | int old_cpu = task_cpu(p); | 2069 | int old_cpu = task_cpu(p); |
| 2063 | struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu); | ||
| 2064 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), | 2070 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), |
| 2065 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); | 2071 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); |
| 2066 | u64 clock_offset; | ||
| 2067 | |||
| 2068 | clock_offset = old_rq->clock - new_rq->clock; | ||
| 2069 | 2072 | ||
| 2070 | trace_sched_migrate_task(p, new_cpu); | 2073 | trace_sched_migrate_task(p, new_cpu); |
| 2071 | 2074 | ||
| 2072 | #ifdef CONFIG_SCHEDSTATS | ||
| 2073 | if (p->se.wait_start) | ||
| 2074 | p->se.wait_start -= clock_offset; | ||
| 2075 | if (p->se.sleep_start) | ||
| 2076 | p->se.sleep_start -= clock_offset; | ||
| 2077 | if (p->se.block_start) | ||
| 2078 | p->se.block_start -= clock_offset; | ||
| 2079 | #endif | ||
| 2080 | if (old_cpu != new_cpu) { | 2075 | if (old_cpu != new_cpu) { |
| 2081 | p->se.nr_migrations++; | 2076 | p->se.nr_migrations++; |
| 2082 | #ifdef CONFIG_SCHEDSTATS | ||
| 2083 | if (task_hot(p, old_rq->clock, NULL)) | ||
| 2084 | schedstat_inc(p, se.nr_forced2_migrations); | ||
| 2085 | #endif | ||
| 2086 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, | 2077 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, |
| 2087 | 1, 1, NULL, 0); | 2078 | 1, 1, NULL, 0); |
| 2088 | } | 2079 | } |
| @@ -2323,6 +2314,14 @@ void task_oncpu_function_call(struct task_struct *p, | |||
| 2323 | preempt_enable(); | 2314 | preempt_enable(); |
| 2324 | } | 2315 | } |
| 2325 | 2316 | ||
| 2317 | #ifdef CONFIG_SMP | ||
| 2318 | static inline | ||
| 2319 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) | ||
| 2320 | { | ||
| 2321 | return p->sched_class->select_task_rq(p, sd_flags, wake_flags); | ||
| 2322 | } | ||
| 2323 | #endif | ||
| 2324 | |||
| 2326 | /*** | 2325 | /*** |
| 2327 | * try_to_wake_up - wake up a thread | 2326 | * try_to_wake_up - wake up a thread |
| 2328 | * @p: the to-be-woken-up thread | 2327 | * @p: the to-be-woken-up thread |
| @@ -2374,17 +2373,14 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
| 2374 | if (task_contributes_to_load(p)) | 2373 | if (task_contributes_to_load(p)) |
| 2375 | rq->nr_uninterruptible--; | 2374 | rq->nr_uninterruptible--; |
| 2376 | p->state = TASK_WAKING; | 2375 | p->state = TASK_WAKING; |
| 2377 | task_rq_unlock(rq, &flags); | 2376 | __task_rq_unlock(rq); |
| 2378 | 2377 | ||
| 2379 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2378 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
| 2380 | if (cpu != orig_cpu) { | 2379 | if (cpu != orig_cpu) |
| 2381 | local_irq_save(flags); | ||
| 2382 | rq = cpu_rq(cpu); | ||
| 2383 | update_rq_clock(rq); | ||
| 2384 | set_task_cpu(p, cpu); | 2380 | set_task_cpu(p, cpu); |
| 2385 | local_irq_restore(flags); | 2381 | |
| 2386 | } | 2382 | rq = __task_rq_lock(p); |
| 2387 | rq = task_rq_lock(p, &flags); | 2383 | update_rq_clock(rq); |
| 2388 | 2384 | ||
| 2389 | WARN_ON(p->state != TASK_WAKING); | 2385 | WARN_ON(p->state != TASK_WAKING); |
| 2390 | cpu = task_cpu(p); | 2386 | cpu = task_cpu(p); |
| @@ -2499,7 +2495,6 @@ static void __sched_fork(struct task_struct *p) | |||
| 2499 | p->se.avg_overlap = 0; | 2495 | p->se.avg_overlap = 0; |
| 2500 | p->se.start_runtime = 0; | 2496 | p->se.start_runtime = 0; |
| 2501 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | 2497 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; |
| 2502 | p->se.avg_running = 0; | ||
| 2503 | 2498 | ||
| 2504 | #ifdef CONFIG_SCHEDSTATS | 2499 | #ifdef CONFIG_SCHEDSTATS |
| 2505 | p->se.wait_start = 0; | 2500 | p->se.wait_start = 0; |
| @@ -2521,7 +2516,6 @@ static void __sched_fork(struct task_struct *p) | |||
| 2521 | p->se.nr_failed_migrations_running = 0; | 2516 | p->se.nr_failed_migrations_running = 0; |
| 2522 | p->se.nr_failed_migrations_hot = 0; | 2517 | p->se.nr_failed_migrations_hot = 0; |
| 2523 | p->se.nr_forced_migrations = 0; | 2518 | p->se.nr_forced_migrations = 0; |
| 2524 | p->se.nr_forced2_migrations = 0; | ||
| 2525 | 2519 | ||
| 2526 | p->se.nr_wakeups = 0; | 2520 | p->se.nr_wakeups = 0; |
| 2527 | p->se.nr_wakeups_sync = 0; | 2521 | p->se.nr_wakeups_sync = 0; |
| @@ -2558,7 +2552,6 @@ static void __sched_fork(struct task_struct *p) | |||
| 2558 | void sched_fork(struct task_struct *p, int clone_flags) | 2552 | void sched_fork(struct task_struct *p, int clone_flags) |
| 2559 | { | 2553 | { |
| 2560 | int cpu = get_cpu(); | 2554 | int cpu = get_cpu(); |
| 2561 | unsigned long flags; | ||
| 2562 | 2555 | ||
| 2563 | __sched_fork(p); | 2556 | __sched_fork(p); |
| 2564 | 2557 | ||
| @@ -2592,13 +2585,13 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
| 2592 | if (!rt_prio(p->prio)) | 2585 | if (!rt_prio(p->prio)) |
| 2593 | p->sched_class = &fair_sched_class; | 2586 | p->sched_class = &fair_sched_class; |
| 2594 | 2587 | ||
| 2588 | if (p->sched_class->task_fork) | ||
| 2589 | p->sched_class->task_fork(p); | ||
| 2590 | |||
| 2595 | #ifdef CONFIG_SMP | 2591 | #ifdef CONFIG_SMP |
| 2596 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); | 2592 | cpu = select_task_rq(p, SD_BALANCE_FORK, 0); |
| 2597 | #endif | 2593 | #endif |
| 2598 | local_irq_save(flags); | ||
| 2599 | update_rq_clock(cpu_rq(cpu)); | ||
| 2600 | set_task_cpu(p, cpu); | 2594 | set_task_cpu(p, cpu); |
| 2601 | local_irq_restore(flags); | ||
| 2602 | 2595 | ||
| 2603 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2596 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
| 2604 | if (likely(sched_info_on())) | 2597 | if (likely(sched_info_on())) |
| @@ -2631,17 +2624,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
| 2631 | rq = task_rq_lock(p, &flags); | 2624 | rq = task_rq_lock(p, &flags); |
| 2632 | BUG_ON(p->state != TASK_RUNNING); | 2625 | BUG_ON(p->state != TASK_RUNNING); |
| 2633 | update_rq_clock(rq); | 2626 | update_rq_clock(rq); |
| 2634 | 2627 | activate_task(rq, p, 0); | |
| 2635 | if (!p->sched_class->task_new || !current->se.on_rq) { | ||
| 2636 | activate_task(rq, p, 0); | ||
| 2637 | } else { | ||
| 2638 | /* | ||
| 2639 | * Let the scheduling class do new task startup | ||
| 2640 | * management (if any): | ||
| 2641 | */ | ||
| 2642 | p->sched_class->task_new(rq, p); | ||
| 2643 | inc_nr_running(rq); | ||
| 2644 | } | ||
| 2645 | trace_sched_wakeup_new(rq, p, 1); | 2628 | trace_sched_wakeup_new(rq, p, 1); |
| 2646 | check_preempt_curr(rq, p, WF_FORK); | 2629 | check_preempt_curr(rq, p, WF_FORK); |
| 2647 | #ifdef CONFIG_SMP | 2630 | #ifdef CONFIG_SMP |
| @@ -3156,7 +3139,7 @@ out: | |||
| 3156 | void sched_exec(void) | 3139 | void sched_exec(void) |
| 3157 | { | 3140 | { |
| 3158 | int new_cpu, this_cpu = get_cpu(); | 3141 | int new_cpu, this_cpu = get_cpu(); |
| 3159 | new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); | 3142 | new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0); |
| 3160 | put_cpu(); | 3143 | put_cpu(); |
| 3161 | if (new_cpu != this_cpu) | 3144 | if (new_cpu != this_cpu) |
| 3162 | sched_migrate_task(current, new_cpu); | 3145 | sched_migrate_task(current, new_cpu); |
| @@ -3172,10 +3155,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, | |||
| 3172 | deactivate_task(src_rq, p, 0); | 3155 | deactivate_task(src_rq, p, 0); |
| 3173 | set_task_cpu(p, this_cpu); | 3156 | set_task_cpu(p, this_cpu); |
| 3174 | activate_task(this_rq, p, 0); | 3157 | activate_task(this_rq, p, 0); |
| 3175 | /* | ||
| 3176 | * Note that idle threads have a prio of MAX_PRIO, for this test | ||
| 3177 | * to be always true for them. | ||
| 3178 | */ | ||
| 3179 | check_preempt_curr(this_rq, p, 0); | 3158 | check_preempt_curr(this_rq, p, 0); |
| 3180 | } | 3159 | } |
| 3181 | 3160 | ||
| @@ -4134,7 +4113,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
| 4134 | unsigned long flags; | 4113 | unsigned long flags; |
| 4135 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4114 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
| 4136 | 4115 | ||
| 4137 | cpumask_copy(cpus, cpu_online_mask); | 4116 | cpumask_copy(cpus, cpu_active_mask); |
| 4138 | 4117 | ||
| 4139 | /* | 4118 | /* |
| 4140 | * When power savings policy is enabled for the parent domain, idle | 4119 | * When power savings policy is enabled for the parent domain, idle |
| @@ -4297,7 +4276,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
| 4297 | int all_pinned = 0; | 4276 | int all_pinned = 0; |
| 4298 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4277 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
| 4299 | 4278 | ||
| 4300 | cpumask_copy(cpus, cpu_online_mask); | 4279 | cpumask_copy(cpus, cpu_active_mask); |
| 4301 | 4280 | ||
| 4302 | /* | 4281 | /* |
| 4303 | * When power savings policy is enabled for the parent domain, idle | 4282 | * When power savings policy is enabled for the parent domain, idle |
| @@ -4694,7 +4673,7 @@ int select_nohz_load_balancer(int stop_tick) | |||
| 4694 | cpumask_set_cpu(cpu, nohz.cpu_mask); | 4673 | cpumask_set_cpu(cpu, nohz.cpu_mask); |
| 4695 | 4674 | ||
| 4696 | /* time for ilb owner also to sleep */ | 4675 | /* time for ilb owner also to sleep */ |
| 4697 | if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { | 4676 | if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) { |
| 4698 | if (atomic_read(&nohz.load_balancer) == cpu) | 4677 | if (atomic_read(&nohz.load_balancer) == cpu) |
| 4699 | atomic_set(&nohz.load_balancer, -1); | 4678 | atomic_set(&nohz.load_balancer, -1); |
| 4700 | return 0; | 4679 | return 0; |
| @@ -5396,13 +5375,14 @@ static inline void schedule_debug(struct task_struct *prev) | |||
| 5396 | #endif | 5375 | #endif |
| 5397 | } | 5376 | } |
| 5398 | 5377 | ||
| 5399 | static void put_prev_task(struct rq *rq, struct task_struct *p) | 5378 | static void put_prev_task(struct rq *rq, struct task_struct *prev) |
| 5400 | { | 5379 | { |
| 5401 | u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; | 5380 | if (prev->state == TASK_RUNNING) { |
| 5381 | u64 runtime = prev->se.sum_exec_runtime; | ||
| 5402 | 5382 | ||
| 5403 | update_avg(&p->se.avg_running, runtime); | 5383 | runtime -= prev->se.prev_sum_exec_runtime; |
| 5384 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
| 5404 | 5385 | ||
| 5405 | if (p->state == TASK_RUNNING) { | ||
| 5406 | /* | 5386 | /* |
| 5407 | * In order to avoid avg_overlap growing stale when we are | 5387 | * In order to avoid avg_overlap growing stale when we are |
| 5408 | * indeed overlapping and hence not getting put to sleep, grow | 5388 | * indeed overlapping and hence not getting put to sleep, grow |
| @@ -5412,12 +5392,9 @@ static void put_prev_task(struct rq *rq, struct task_struct *p) | |||
| 5412 | * correlates to the amount of cache footprint a task can | 5392 | * correlates to the amount of cache footprint a task can |
| 5413 | * build up. | 5393 | * build up. |
| 5414 | */ | 5394 | */ |
| 5415 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | 5395 | update_avg(&prev->se.avg_overlap, runtime); |
| 5416 | update_avg(&p->se.avg_overlap, runtime); | ||
| 5417 | } else { | ||
| 5418 | update_avg(&p->se.avg_running, 0); | ||
| 5419 | } | 5396 | } |
| 5420 | p->sched_class->put_prev_task(rq, p); | 5397 | prev->sched_class->put_prev_task(rq, prev); |
| 5421 | } | 5398 | } |
| 5422 | 5399 | ||
| 5423 | /* | 5400 | /* |
| @@ -6631,6 +6608,8 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, | |||
| 6631 | long sched_getaffinity(pid_t pid, struct cpumask *mask) | 6608 | long sched_getaffinity(pid_t pid, struct cpumask *mask) |
| 6632 | { | 6609 | { |
| 6633 | struct task_struct *p; | 6610 | struct task_struct *p; |
| 6611 | unsigned long flags; | ||
| 6612 | struct rq *rq; | ||
| 6634 | int retval; | 6613 | int retval; |
| 6635 | 6614 | ||
| 6636 | get_online_cpus(); | 6615 | get_online_cpus(); |
| @@ -6645,7 +6624,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
| 6645 | if (retval) | 6624 | if (retval) |
| 6646 | goto out_unlock; | 6625 | goto out_unlock; |
| 6647 | 6626 | ||
| 6627 | rq = task_rq_lock(p, &flags); | ||
| 6648 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); | 6628 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); |
| 6629 | task_rq_unlock(rq, &flags); | ||
| 6649 | 6630 | ||
| 6650 | out_unlock: | 6631 | out_unlock: |
| 6651 | read_unlock(&tasklist_lock); | 6632 | read_unlock(&tasklist_lock); |
| @@ -6883,6 +6864,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
| 6883 | { | 6864 | { |
| 6884 | struct task_struct *p; | 6865 | struct task_struct *p; |
| 6885 | unsigned int time_slice; | 6866 | unsigned int time_slice; |
| 6867 | unsigned long flags; | ||
| 6868 | struct rq *rq; | ||
| 6886 | int retval; | 6869 | int retval; |
| 6887 | struct timespec t; | 6870 | struct timespec t; |
| 6888 | 6871 | ||
| @@ -6899,7 +6882,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
| 6899 | if (retval) | 6882 | if (retval) |
| 6900 | goto out_unlock; | 6883 | goto out_unlock; |
| 6901 | 6884 | ||
| 6902 | time_slice = p->sched_class->get_rr_interval(p); | 6885 | rq = task_rq_lock(p, &flags); |
| 6886 | time_slice = p->sched_class->get_rr_interval(rq, p); | ||
| 6887 | task_rq_unlock(rq, &flags); | ||
| 6903 | 6888 | ||
| 6904 | read_unlock(&tasklist_lock); | 6889 | read_unlock(&tasklist_lock); |
| 6905 | jiffies_to_timespec(time_slice, &t); | 6890 | jiffies_to_timespec(time_slice, &t); |
| @@ -7000,7 +6985,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
| 7000 | __sched_fork(idle); | 6985 | __sched_fork(idle); |
| 7001 | idle->se.exec_start = sched_clock(); | 6986 | idle->se.exec_start = sched_clock(); |
| 7002 | 6987 | ||
| 7003 | idle->prio = idle->normal_prio = MAX_PRIO; | ||
| 7004 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); | 6988 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); |
| 7005 | __set_task_cpu(idle, cpu); | 6989 | __set_task_cpu(idle, cpu); |
| 7006 | 6990 | ||
| @@ -7041,22 +7025,43 @@ cpumask_var_t nohz_cpu_mask; | |||
| 7041 | * | 7025 | * |
| 7042 | * This idea comes from the SD scheduler of Con Kolivas: | 7026 | * This idea comes from the SD scheduler of Con Kolivas: |
| 7043 | */ | 7027 | */ |
| 7044 | static inline void sched_init_granularity(void) | 7028 | static int get_update_sysctl_factor(void) |
| 7045 | { | 7029 | { |
| 7046 | unsigned int factor = 1 + ilog2(num_online_cpus()); | 7030 | unsigned int cpus = min_t(int, num_online_cpus(), 8); |
| 7047 | const unsigned long limit = 200000000; | 7031 | unsigned int factor; |
| 7032 | |||
| 7033 | switch (sysctl_sched_tunable_scaling) { | ||
| 7034 | case SCHED_TUNABLESCALING_NONE: | ||
| 7035 | factor = 1; | ||
| 7036 | break; | ||
| 7037 | case SCHED_TUNABLESCALING_LINEAR: | ||
| 7038 | factor = cpus; | ||
| 7039 | break; | ||
| 7040 | case SCHED_TUNABLESCALING_LOG: | ||
| 7041 | default: | ||
| 7042 | factor = 1 + ilog2(cpus); | ||
| 7043 | break; | ||
| 7044 | } | ||
| 7048 | 7045 | ||
| 7049 | sysctl_sched_min_granularity *= factor; | 7046 | return factor; |
| 7050 | if (sysctl_sched_min_granularity > limit) | 7047 | } |
| 7051 | sysctl_sched_min_granularity = limit; | ||
| 7052 | 7048 | ||
| 7053 | sysctl_sched_latency *= factor; | 7049 | static void update_sysctl(void) |
| 7054 | if (sysctl_sched_latency > limit) | 7050 | { |
| 7055 | sysctl_sched_latency = limit; | 7051 | unsigned int factor = get_update_sysctl_factor(); |
| 7056 | 7052 | ||
| 7057 | sysctl_sched_wakeup_granularity *= factor; | 7053 | #define SET_SYSCTL(name) \ |
| 7054 | (sysctl_##name = (factor) * normalized_sysctl_##name) | ||
| 7055 | SET_SYSCTL(sched_min_granularity); | ||
| 7056 | SET_SYSCTL(sched_latency); | ||
| 7057 | SET_SYSCTL(sched_wakeup_granularity); | ||
| 7058 | SET_SYSCTL(sched_shares_ratelimit); | ||
| 7059 | #undef SET_SYSCTL | ||
| 7060 | } | ||
| 7058 | 7061 | ||
| 7059 | sysctl_sched_shares_ratelimit *= factor; | 7062 | static inline void sched_init_granularity(void) |
| 7063 | { | ||
| 7064 | update_sysctl(); | ||
| 7060 | } | 7065 | } |
| 7061 | 7066 | ||
| 7062 | #ifdef CONFIG_SMP | 7067 | #ifdef CONFIG_SMP |
| @@ -7093,7 +7098,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
| 7093 | int ret = 0; | 7098 | int ret = 0; |
| 7094 | 7099 | ||
| 7095 | rq = task_rq_lock(p, &flags); | 7100 | rq = task_rq_lock(p, &flags); |
| 7096 | if (!cpumask_intersects(new_mask, cpu_online_mask)) { | 7101 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
| 7097 | ret = -EINVAL; | 7102 | ret = -EINVAL; |
| 7098 | goto out; | 7103 | goto out; |
| 7099 | } | 7104 | } |
| @@ -7115,7 +7120,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
| 7115 | if (cpumask_test_cpu(task_cpu(p), new_mask)) | 7120 | if (cpumask_test_cpu(task_cpu(p), new_mask)) |
| 7116 | goto out; | 7121 | goto out; |
| 7117 | 7122 | ||
| 7118 | if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { | 7123 | if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) { |
| 7119 | /* Need help from migration thread: drop lock and wait. */ | 7124 | /* Need help from migration thread: drop lock and wait. */ |
| 7120 | struct task_struct *mt = rq->migration_thread; | 7125 | struct task_struct *mt = rq->migration_thread; |
| 7121 | 7126 | ||
| @@ -7269,19 +7274,19 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
| 7269 | 7274 | ||
| 7270 | again: | 7275 | again: |
| 7271 | /* Look for allowed, online CPU in same node. */ | 7276 | /* Look for allowed, online CPU in same node. */ |
| 7272 | for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask) | 7277 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) |
| 7273 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 7278 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) |
| 7274 | goto move; | 7279 | goto move; |
| 7275 | 7280 | ||
| 7276 | /* Any allowed, online CPU? */ | 7281 | /* Any allowed, online CPU? */ |
| 7277 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask); | 7282 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); |
| 7278 | if (dest_cpu < nr_cpu_ids) | 7283 | if (dest_cpu < nr_cpu_ids) |
| 7279 | goto move; | 7284 | goto move; |
| 7280 | 7285 | ||
| 7281 | /* No more Mr. Nice Guy. */ | 7286 | /* No more Mr. Nice Guy. */ |
| 7282 | if (dest_cpu >= nr_cpu_ids) { | 7287 | if (dest_cpu >= nr_cpu_ids) { |
| 7283 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); | 7288 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); |
| 7284 | dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed); | 7289 | dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); |
| 7285 | 7290 | ||
| 7286 | /* | 7291 | /* |
| 7287 | * Don't tell them about moving exiting tasks or | 7292 | * Don't tell them about moving exiting tasks or |
| @@ -7310,7 +7315,7 @@ move: | |||
| 7310 | */ | 7315 | */ |
| 7311 | static void migrate_nr_uninterruptible(struct rq *rq_src) | 7316 | static void migrate_nr_uninterruptible(struct rq *rq_src) |
| 7312 | { | 7317 | { |
| 7313 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); | 7318 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); |
| 7314 | unsigned long flags; | 7319 | unsigned long flags; |
| 7315 | 7320 | ||
| 7316 | local_irq_save(flags); | 7321 | local_irq_save(flags); |
| @@ -7563,7 +7568,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | |||
| 7563 | static struct ctl_table_header *sd_sysctl_header; | 7568 | static struct ctl_table_header *sd_sysctl_header; |
| 7564 | static void register_sched_domain_sysctl(void) | 7569 | static void register_sched_domain_sysctl(void) |
| 7565 | { | 7570 | { |
| 7566 | int i, cpu_num = num_online_cpus(); | 7571 | int i, cpu_num = num_possible_cpus(); |
| 7567 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); | 7572 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); |
| 7568 | char buf[32]; | 7573 | char buf[32]; |
| 7569 | 7574 | ||
| @@ -7573,7 +7578,7 @@ static void register_sched_domain_sysctl(void) | |||
| 7573 | if (entry == NULL) | 7578 | if (entry == NULL) |
| 7574 | return; | 7579 | return; |
| 7575 | 7580 | ||
| 7576 | for_each_online_cpu(i) { | 7581 | for_each_possible_cpu(i) { |
| 7577 | snprintf(buf, 32, "cpu%d", i); | 7582 | snprintf(buf, 32, "cpu%d", i); |
| 7578 | entry->procname = kstrdup(buf, GFP_KERNEL); | 7583 | entry->procname = kstrdup(buf, GFP_KERNEL); |
| 7579 | entry->mode = 0555; | 7584 | entry->mode = 0555; |
| @@ -7703,7 +7708,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 7703 | spin_lock_irq(&rq->lock); | 7708 | spin_lock_irq(&rq->lock); |
| 7704 | update_rq_clock(rq); | 7709 | update_rq_clock(rq); |
| 7705 | deactivate_task(rq, rq->idle, 0); | 7710 | deactivate_task(rq, rq->idle, 0); |
| 7706 | rq->idle->static_prio = MAX_PRIO; | ||
| 7707 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); | 7711 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); |
| 7708 | rq->idle->sched_class = &idle_sched_class; | 7712 | rq->idle->sched_class = &idle_sched_class; |
| 7709 | migrate_dead_tasks(cpu); | 7713 | migrate_dead_tasks(cpu); |
| @@ -8282,14 +8286,14 @@ enum s_alloc { | |||
| 8282 | */ | 8286 | */ |
| 8283 | #ifdef CONFIG_SCHED_SMT | 8287 | #ifdef CONFIG_SCHED_SMT |
| 8284 | static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); | 8288 | static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); |
| 8285 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus); | 8289 | static DEFINE_PER_CPU(struct static_sched_group, sched_groups); |
| 8286 | 8290 | ||
| 8287 | static int | 8291 | static int |
| 8288 | cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, | 8292 | cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, |
| 8289 | struct sched_group **sg, struct cpumask *unused) | 8293 | struct sched_group **sg, struct cpumask *unused) |
| 8290 | { | 8294 | { |
| 8291 | if (sg) | 8295 | if (sg) |
| 8292 | *sg = &per_cpu(sched_group_cpus, cpu).sg; | 8296 | *sg = &per_cpu(sched_groups, cpu).sg; |
| 8293 | return cpu; | 8297 | return cpu; |
| 8294 | } | 8298 | } |
| 8295 | #endif /* CONFIG_SCHED_SMT */ | 8299 | #endif /* CONFIG_SCHED_SMT */ |
| @@ -9099,7 +9103,7 @@ match1: | |||
| 9099 | if (doms_new == NULL) { | 9103 | if (doms_new == NULL) { |
| 9100 | ndoms_cur = 0; | 9104 | ndoms_cur = 0; |
| 9101 | doms_new = &fallback_doms; | 9105 | doms_new = &fallback_doms; |
| 9102 | cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map); | 9106 | cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map); |
| 9103 | WARN_ON_ONCE(dattr_new); | 9107 | WARN_ON_ONCE(dattr_new); |
| 9104 | } | 9108 | } |
| 9105 | 9109 | ||
| @@ -9230,8 +9234,10 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
| 9230 | switch (action) { | 9234 | switch (action) { |
| 9231 | case CPU_ONLINE: | 9235 | case CPU_ONLINE: |
| 9232 | case CPU_ONLINE_FROZEN: | 9236 | case CPU_ONLINE_FROZEN: |
| 9233 | case CPU_DEAD: | 9237 | case CPU_DOWN_PREPARE: |
| 9234 | case CPU_DEAD_FROZEN: | 9238 | case CPU_DOWN_PREPARE_FROZEN: |
| 9239 | case CPU_DOWN_FAILED: | ||
| 9240 | case CPU_DOWN_FAILED_FROZEN: | ||
| 9235 | partition_sched_domains(1, NULL, NULL); | 9241 | partition_sched_domains(1, NULL, NULL); |
| 9236 | return NOTIFY_OK; | 9242 | return NOTIFY_OK; |
| 9237 | 9243 | ||
| @@ -9278,7 +9284,7 @@ void __init sched_init_smp(void) | |||
| 9278 | #endif | 9284 | #endif |
| 9279 | get_online_cpus(); | 9285 | get_online_cpus(); |
| 9280 | mutex_lock(&sched_domains_mutex); | 9286 | mutex_lock(&sched_domains_mutex); |
| 9281 | arch_init_sched_domains(cpu_online_mask); | 9287 | arch_init_sched_domains(cpu_active_mask); |
| 9282 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); | 9288 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); |
| 9283 | if (cpumask_empty(non_isolated_cpus)) | 9289 | if (cpumask_empty(non_isolated_cpus)) |
| 9284 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); | 9290 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); |
| @@ -9577,7 +9583,7 @@ void __init sched_init(void) | |||
| 9577 | #elif defined CONFIG_USER_SCHED | 9583 | #elif defined CONFIG_USER_SCHED |
| 9578 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL); | 9584 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL); |
| 9579 | init_tg_rt_entry(&init_task_group, | 9585 | init_tg_rt_entry(&init_task_group, |
| 9580 | &per_cpu(init_rt_rq, i), | 9586 | &per_cpu(init_rt_rq_var, i), |
| 9581 | &per_cpu(init_sched_rt_entity, i), i, 1, | 9587 | &per_cpu(init_sched_rt_entity, i), i, 1, |
| 9582 | root_task_group.rt_se[i]); | 9588 | root_task_group.rt_se[i]); |
| 9583 | #endif | 9589 | #endif |
| @@ -9842,13 +9848,15 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 9842 | se = kzalloc_node(sizeof(struct sched_entity), | 9848 | se = kzalloc_node(sizeof(struct sched_entity), |
| 9843 | GFP_KERNEL, cpu_to_node(i)); | 9849 | GFP_KERNEL, cpu_to_node(i)); |
| 9844 | if (!se) | 9850 | if (!se) |
| 9845 | goto err; | 9851 | goto err_free_rq; |
| 9846 | 9852 | ||
| 9847 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); | 9853 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); |
| 9848 | } | 9854 | } |
| 9849 | 9855 | ||
| 9850 | return 1; | 9856 | return 1; |
| 9851 | 9857 | ||
| 9858 | err_free_rq: | ||
| 9859 | kfree(cfs_rq); | ||
| 9852 | err: | 9860 | err: |
| 9853 | return 0; | 9861 | return 0; |
| 9854 | } | 9862 | } |
| @@ -9930,13 +9938,15 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 9930 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), | 9938 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), |
| 9931 | GFP_KERNEL, cpu_to_node(i)); | 9939 | GFP_KERNEL, cpu_to_node(i)); |
| 9932 | if (!rt_se) | 9940 | if (!rt_se) |
| 9933 | goto err; | 9941 | goto err_free_rq; |
| 9934 | 9942 | ||
| 9935 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); | 9943 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); |
| 9936 | } | 9944 | } |
| 9937 | 9945 | ||
| 9938 | return 1; | 9946 | return 1; |
| 9939 | 9947 | ||
| 9948 | err_free_rq: | ||
| 9949 | kfree(rt_rq); | ||
| 9940 | err: | 9950 | err: |
| 9941 | return 0; | 9951 | return 0; |
| 9942 | } | 9952 | } |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 6988cf08f705..5ae24fc65d75 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
| @@ -309,6 +309,12 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
| 309 | print_rq(m, rq, cpu); | 309 | print_rq(m, rq, cpu); |
| 310 | } | 310 | } |
| 311 | 311 | ||
| 312 | static const char *sched_tunable_scaling_names[] = { | ||
| 313 | "none", | ||
| 314 | "logaritmic", | ||
| 315 | "linear" | ||
| 316 | }; | ||
| 317 | |||
| 312 | static int sched_debug_show(struct seq_file *m, void *v) | 318 | static int sched_debug_show(struct seq_file *m, void *v) |
| 313 | { | 319 | { |
| 314 | u64 now = ktime_to_ns(ktime_get()); | 320 | u64 now = ktime_to_ns(ktime_get()); |
| @@ -334,6 +340,10 @@ static int sched_debug_show(struct seq_file *m, void *v) | |||
| 334 | #undef PN | 340 | #undef PN |
| 335 | #undef P | 341 | #undef P |
| 336 | 342 | ||
| 343 | SEQ_printf(m, " .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling", | ||
| 344 | sysctl_sched_tunable_scaling, | ||
| 345 | sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); | ||
| 346 | |||
| 337 | for_each_online_cpu(cpu) | 347 | for_each_online_cpu(cpu) |
| 338 | print_cpu(m, cpu); | 348 | print_cpu(m, cpu); |
| 339 | 349 | ||
| @@ -399,7 +409,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
| 399 | PN(se.sum_exec_runtime); | 409 | PN(se.sum_exec_runtime); |
| 400 | PN(se.avg_overlap); | 410 | PN(se.avg_overlap); |
| 401 | PN(se.avg_wakeup); | 411 | PN(se.avg_wakeup); |
| 402 | PN(se.avg_running); | ||
| 403 | 412 | ||
| 404 | nr_switches = p->nvcsw + p->nivcsw; | 413 | nr_switches = p->nvcsw + p->nivcsw; |
| 405 | 414 | ||
| @@ -423,7 +432,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
| 423 | P(se.nr_failed_migrations_running); | 432 | P(se.nr_failed_migrations_running); |
| 424 | P(se.nr_failed_migrations_hot); | 433 | P(se.nr_failed_migrations_hot); |
| 425 | P(se.nr_forced_migrations); | 434 | P(se.nr_forced_migrations); |
| 426 | P(se.nr_forced2_migrations); | ||
| 427 | P(se.nr_wakeups); | 435 | P(se.nr_wakeups); |
| 428 | P(se.nr_wakeups_sync); | 436 | P(se.nr_wakeups_sync); |
| 429 | P(se.nr_wakeups_migrate); | 437 | P(se.nr_wakeups_migrate); |
| @@ -499,7 +507,6 @@ void proc_sched_set_task(struct task_struct *p) | |||
| 499 | p->se.nr_failed_migrations_running = 0; | 507 | p->se.nr_failed_migrations_running = 0; |
| 500 | p->se.nr_failed_migrations_hot = 0; | 508 | p->se.nr_failed_migrations_hot = 0; |
| 501 | p->se.nr_forced_migrations = 0; | 509 | p->se.nr_forced_migrations = 0; |
| 502 | p->se.nr_forced2_migrations = 0; | ||
| 503 | p->se.nr_wakeups = 0; | 510 | p->se.nr_wakeups = 0; |
| 504 | p->se.nr_wakeups_sync = 0; | 511 | p->se.nr_wakeups_sync = 0; |
| 505 | p->se.nr_wakeups_migrate = 0; | 512 | p->se.nr_wakeups_migrate = 0; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f61837ad336d..804a411838f1 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | */ | 21 | */ |
| 22 | 22 | ||
| 23 | #include <linux/latencytop.h> | 23 | #include <linux/latencytop.h> |
| 24 | #include <linux/sched.h> | ||
| 24 | 25 | ||
| 25 | /* | 26 | /* |
| 26 | * Targeted preemption latency for CPU-bound tasks: | 27 | * Targeted preemption latency for CPU-bound tasks: |
| @@ -35,12 +36,26 @@ | |||
| 35 | * run vmstat and monitor the context-switches (cs) field) | 36 | * run vmstat and monitor the context-switches (cs) field) |
| 36 | */ | 37 | */ |
| 37 | unsigned int sysctl_sched_latency = 5000000ULL; | 38 | unsigned int sysctl_sched_latency = 5000000ULL; |
| 39 | unsigned int normalized_sysctl_sched_latency = 5000000ULL; | ||
| 40 | |||
| 41 | /* | ||
| 42 | * The initial- and re-scaling of tunables is configurable | ||
| 43 | * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) | ||
| 44 | * | ||
| 45 | * Options are: | ||
| 46 | * SCHED_TUNABLESCALING_NONE - unscaled, always *1 | ||
| 47 | * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) | ||
| 48 | * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus | ||
| 49 | */ | ||
| 50 | enum sched_tunable_scaling sysctl_sched_tunable_scaling | ||
| 51 | = SCHED_TUNABLESCALING_LOG; | ||
| 38 | 52 | ||
| 39 | /* | 53 | /* |
| 40 | * Minimal preemption granularity for CPU-bound tasks: | 54 | * Minimal preemption granularity for CPU-bound tasks: |
| 41 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) | 55 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) |
| 42 | */ | 56 | */ |
| 43 | unsigned int sysctl_sched_min_granularity = 1000000ULL; | 57 | unsigned int sysctl_sched_min_granularity = 1000000ULL; |
| 58 | unsigned int normalized_sysctl_sched_min_granularity = 1000000ULL; | ||
| 44 | 59 | ||
| 45 | /* | 60 | /* |
| 46 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity | 61 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity |
| @@ -70,6 +85,7 @@ unsigned int __read_mostly sysctl_sched_compat_yield; | |||
| 70 | * have immediate wakeup/sleep latencies. | 85 | * have immediate wakeup/sleep latencies. |
| 71 | */ | 86 | */ |
| 72 | unsigned int sysctl_sched_wakeup_granularity = 1000000UL; | 87 | unsigned int sysctl_sched_wakeup_granularity = 1000000UL; |
| 88 | unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; | ||
| 73 | 89 | ||
| 74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 90 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
| 75 | 91 | ||
| @@ -383,11 +399,12 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | |||
| 383 | */ | 399 | */ |
| 384 | 400 | ||
| 385 | #ifdef CONFIG_SCHED_DEBUG | 401 | #ifdef CONFIG_SCHED_DEBUG |
| 386 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 402 | int sched_proc_update_handler(struct ctl_table *table, int write, |
| 387 | void __user *buffer, size_t *lenp, | 403 | void __user *buffer, size_t *lenp, |
| 388 | loff_t *ppos) | 404 | loff_t *ppos) |
| 389 | { | 405 | { |
| 390 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 406 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
| 407 | int factor = get_update_sysctl_factor(); | ||
| 391 | 408 | ||
| 392 | if (ret || !write) | 409 | if (ret || !write) |
| 393 | return ret; | 410 | return ret; |
| @@ -395,6 +412,14 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
| 395 | sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency, | 412 | sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency, |
| 396 | sysctl_sched_min_granularity); | 413 | sysctl_sched_min_granularity); |
| 397 | 414 | ||
| 415 | #define WRT_SYSCTL(name) \ | ||
| 416 | (normalized_sysctl_##name = sysctl_##name / (factor)) | ||
| 417 | WRT_SYSCTL(sched_min_granularity); | ||
| 418 | WRT_SYSCTL(sched_latency); | ||
| 419 | WRT_SYSCTL(sched_wakeup_granularity); | ||
| 420 | WRT_SYSCTL(sched_shares_ratelimit); | ||
| 421 | #undef WRT_SYSCTL | ||
| 422 | |||
| 398 | return 0; | 423 | return 0; |
| 399 | } | 424 | } |
| 400 | #endif | 425 | #endif |
| @@ -1403,7 +1428,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
| 1403 | new_cpu = prev_cpu; | 1428 | new_cpu = prev_cpu; |
| 1404 | } | 1429 | } |
| 1405 | 1430 | ||
| 1406 | rcu_read_lock(); | ||
| 1407 | for_each_domain(cpu, tmp) { | 1431 | for_each_domain(cpu, tmp) { |
| 1408 | /* | 1432 | /* |
| 1409 | * If power savings logic is enabled for a domain, see if we | 1433 | * If power savings logic is enabled for a domain, see if we |
| @@ -1484,10 +1508,8 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
| 1484 | update_shares(tmp); | 1508 | update_shares(tmp); |
| 1485 | } | 1509 | } |
| 1486 | 1510 | ||
| 1487 | if (affine_sd && wake_affine(affine_sd, p, sync)) { | 1511 | if (affine_sd && wake_affine(affine_sd, p, sync)) |
| 1488 | new_cpu = cpu; | 1512 | return cpu; |
| 1489 | goto out; | ||
| 1490 | } | ||
| 1491 | 1513 | ||
| 1492 | while (sd) { | 1514 | while (sd) { |
| 1493 | int load_idx = sd->forkexec_idx; | 1515 | int load_idx = sd->forkexec_idx; |
| @@ -1528,8 +1550,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
| 1528 | /* while loop will break here if sd == NULL */ | 1550 | /* while loop will break here if sd == NULL */ |
| 1529 | } | 1551 | } |
| 1530 | 1552 | ||
| 1531 | out: | ||
| 1532 | rcu_read_unlock(); | ||
| 1533 | return new_cpu; | 1553 | return new_cpu; |
| 1534 | } | 1554 | } |
| 1535 | #endif /* CONFIG_SMP */ | 1555 | #endif /* CONFIG_SMP */ |
| @@ -1651,12 +1671,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
| 1651 | int sync = wake_flags & WF_SYNC; | 1671 | int sync = wake_flags & WF_SYNC; |
| 1652 | int scale = cfs_rq->nr_running >= sched_nr_latency; | 1672 | int scale = cfs_rq->nr_running >= sched_nr_latency; |
| 1653 | 1673 | ||
| 1654 | update_curr(cfs_rq); | 1674 | if (unlikely(rt_prio(p->prio))) |
| 1655 | 1675 | goto preempt; | |
| 1656 | if (unlikely(rt_prio(p->prio))) { | ||
| 1657 | resched_task(curr); | ||
| 1658 | return; | ||
| 1659 | } | ||
| 1660 | 1676 | ||
| 1661 | if (unlikely(p->sched_class != &fair_sched_class)) | 1677 | if (unlikely(p->sched_class != &fair_sched_class)) |
| 1662 | return; | 1678 | return; |
| @@ -1682,50 +1698,44 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
| 1682 | return; | 1698 | return; |
| 1683 | 1699 | ||
| 1684 | /* Idle tasks are by definition preempted by everybody. */ | 1700 | /* Idle tasks are by definition preempted by everybody. */ |
| 1685 | if (unlikely(curr->policy == SCHED_IDLE)) { | 1701 | if (unlikely(curr->policy == SCHED_IDLE)) |
| 1686 | resched_task(curr); | 1702 | goto preempt; |
| 1687 | return; | ||
| 1688 | } | ||
| 1689 | 1703 | ||
| 1690 | if ((sched_feat(WAKEUP_SYNC) && sync) || | 1704 | if (sched_feat(WAKEUP_SYNC) && sync) |
| 1691 | (sched_feat(WAKEUP_OVERLAP) && | 1705 | goto preempt; |
| 1692 | (se->avg_overlap < sysctl_sched_migration_cost && | ||
| 1693 | pse->avg_overlap < sysctl_sched_migration_cost))) { | ||
| 1694 | resched_task(curr); | ||
| 1695 | return; | ||
| 1696 | } | ||
| 1697 | 1706 | ||
| 1698 | if (sched_feat(WAKEUP_RUNNING)) { | 1707 | if (sched_feat(WAKEUP_OVERLAP) && |
| 1699 | if (pse->avg_running < se->avg_running) { | 1708 | se->avg_overlap < sysctl_sched_migration_cost && |
| 1700 | set_next_buddy(pse); | 1709 | pse->avg_overlap < sysctl_sched_migration_cost) |
| 1701 | resched_task(curr); | 1710 | goto preempt; |
| 1702 | return; | ||
| 1703 | } | ||
| 1704 | } | ||
| 1705 | 1711 | ||
| 1706 | if (!sched_feat(WAKEUP_PREEMPT)) | 1712 | if (!sched_feat(WAKEUP_PREEMPT)) |
| 1707 | return; | 1713 | return; |
| 1708 | 1714 | ||
| 1715 | update_curr(cfs_rq); | ||
| 1709 | find_matching_se(&se, &pse); | 1716 | find_matching_se(&se, &pse); |
| 1710 | |||
| 1711 | BUG_ON(!pse); | 1717 | BUG_ON(!pse); |
| 1718 | if (wakeup_preempt_entity(se, pse) == 1) | ||
| 1719 | goto preempt; | ||
| 1712 | 1720 | ||
| 1713 | if (wakeup_preempt_entity(se, pse) == 1) { | 1721 | return; |
| 1714 | resched_task(curr); | 1722 | |
| 1715 | /* | 1723 | preempt: |
| 1716 | * Only set the backward buddy when the current task is still | 1724 | resched_task(curr); |
| 1717 | * on the rq. This can happen when a wakeup gets interleaved | 1725 | /* |
| 1718 | * with schedule on the ->pre_schedule() or idle_balance() | 1726 | * Only set the backward buddy when the current task is still |
| 1719 | * point, either of which can * drop the rq lock. | 1727 | * on the rq. This can happen when a wakeup gets interleaved |
| 1720 | * | 1728 | * with schedule on the ->pre_schedule() or idle_balance() |
| 1721 | * Also, during early boot the idle thread is in the fair class, | 1729 | * point, either of which can * drop the rq lock. |
| 1722 | * for obvious reasons its a bad idea to schedule back to it. | 1730 | * |
| 1723 | */ | 1731 | * Also, during early boot the idle thread is in the fair class, |
| 1724 | if (unlikely(!se->on_rq || curr == rq->idle)) | 1732 | * for obvious reasons its a bad idea to schedule back to it. |
| 1725 | return; | 1733 | */ |
| 1726 | if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) | 1734 | if (unlikely(!se->on_rq || curr == rq->idle)) |
| 1727 | set_last_buddy(se); | 1735 | return; |
| 1728 | } | 1736 | |
| 1737 | if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) | ||
| 1738 | set_last_buddy(se); | ||
| 1729 | } | 1739 | } |
| 1730 | 1740 | ||
| 1731 | static struct task_struct *pick_next_task_fair(struct rq *rq) | 1741 | static struct task_struct *pick_next_task_fair(struct rq *rq) |
| @@ -1905,6 +1915,17 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
| 1905 | 1915 | ||
| 1906 | return 0; | 1916 | return 0; |
| 1907 | } | 1917 | } |
| 1918 | |||
| 1919 | static void rq_online_fair(struct rq *rq) | ||
| 1920 | { | ||
| 1921 | update_sysctl(); | ||
| 1922 | } | ||
| 1923 | |||
| 1924 | static void rq_offline_fair(struct rq *rq) | ||
| 1925 | { | ||
| 1926 | update_sysctl(); | ||
| 1927 | } | ||
| 1928 | |||
| 1908 | #endif /* CONFIG_SMP */ | 1929 | #endif /* CONFIG_SMP */ |
| 1909 | 1930 | ||
| 1910 | /* | 1931 | /* |
| @@ -1922,28 +1943,30 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) | |||
| 1922 | } | 1943 | } |
| 1923 | 1944 | ||
| 1924 | /* | 1945 | /* |
| 1925 | * Share the fairness runtime between parent and child, thus the | 1946 | * called on fork with the child task as argument from the parent's context |
| 1926 | * total amount of pressure for CPU stays equal - new tasks | 1947 | * - child not yet on the tasklist |
| 1927 | * get a chance to run but frequent forkers are not allowed to | 1948 | * - preemption disabled |
| 1928 | * monopolize the CPU. Note: the parent runqueue is locked, | ||
| 1929 | * the child is not running yet. | ||
| 1930 | */ | 1949 | */ |
| 1931 | static void task_new_fair(struct rq *rq, struct task_struct *p) | 1950 | static void task_fork_fair(struct task_struct *p) |
| 1932 | { | 1951 | { |
| 1933 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | 1952 | struct cfs_rq *cfs_rq = task_cfs_rq(current); |
| 1934 | struct sched_entity *se = &p->se, *curr = cfs_rq->curr; | 1953 | struct sched_entity *se = &p->se, *curr = cfs_rq->curr; |
| 1935 | int this_cpu = smp_processor_id(); | 1954 | int this_cpu = smp_processor_id(); |
| 1955 | struct rq *rq = this_rq(); | ||
| 1956 | unsigned long flags; | ||
| 1957 | |||
| 1958 | spin_lock_irqsave(&rq->lock, flags); | ||
| 1936 | 1959 | ||
| 1937 | sched_info_queued(p); | 1960 | if (unlikely(task_cpu(p) != this_cpu)) |
| 1961 | __set_task_cpu(p, this_cpu); | ||
| 1938 | 1962 | ||
| 1939 | update_curr(cfs_rq); | 1963 | update_curr(cfs_rq); |
| 1964 | |||
| 1940 | if (curr) | 1965 | if (curr) |
| 1941 | se->vruntime = curr->vruntime; | 1966 | se->vruntime = curr->vruntime; |
| 1942 | place_entity(cfs_rq, se, 1); | 1967 | place_entity(cfs_rq, se, 1); |
| 1943 | 1968 | ||
| 1944 | /* 'curr' will be NULL if the child belongs to a different group */ | 1969 | if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) { |
| 1945 | if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) && | ||
| 1946 | curr && entity_before(curr, se)) { | ||
| 1947 | /* | 1970 | /* |
| 1948 | * Upon rescheduling, sched_class::put_prev_task() will place | 1971 | * Upon rescheduling, sched_class::put_prev_task() will place |
| 1949 | * 'current' within the tree based on its new key value. | 1972 | * 'current' within the tree based on its new key value. |
| @@ -1952,7 +1975,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) | |||
| 1952 | resched_task(rq->curr); | 1975 | resched_task(rq->curr); |
| 1953 | } | 1976 | } |
| 1954 | 1977 | ||
| 1955 | enqueue_task_fair(rq, p, 0); | 1978 | spin_unlock_irqrestore(&rq->lock, flags); |
| 1956 | } | 1979 | } |
| 1957 | 1980 | ||
| 1958 | /* | 1981 | /* |
| @@ -2014,21 +2037,17 @@ static void moved_group_fair(struct task_struct *p) | |||
| 2014 | } | 2037 | } |
| 2015 | #endif | 2038 | #endif |
| 2016 | 2039 | ||
| 2017 | unsigned int get_rr_interval_fair(struct task_struct *task) | 2040 | unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task) |
| 2018 | { | 2041 | { |
| 2019 | struct sched_entity *se = &task->se; | 2042 | struct sched_entity *se = &task->se; |
| 2020 | unsigned long flags; | ||
| 2021 | struct rq *rq; | ||
| 2022 | unsigned int rr_interval = 0; | 2043 | unsigned int rr_interval = 0; |
| 2023 | 2044 | ||
| 2024 | /* | 2045 | /* |
| 2025 | * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise | 2046 | * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise |
| 2026 | * idle runqueue: | 2047 | * idle runqueue: |
| 2027 | */ | 2048 | */ |
| 2028 | rq = task_rq_lock(task, &flags); | ||
| 2029 | if (rq->cfs.load.weight) | 2049 | if (rq->cfs.load.weight) |
| 2030 | rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); | 2050 | rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); |
| 2031 | task_rq_unlock(rq, &flags); | ||
| 2032 | 2051 | ||
| 2033 | return rr_interval; | 2052 | return rr_interval; |
| 2034 | } | 2053 | } |
| @@ -2052,11 +2071,13 @@ static const struct sched_class fair_sched_class = { | |||
| 2052 | 2071 | ||
| 2053 | .load_balance = load_balance_fair, | 2072 | .load_balance = load_balance_fair, |
| 2054 | .move_one_task = move_one_task_fair, | 2073 | .move_one_task = move_one_task_fair, |
| 2074 | .rq_online = rq_online_fair, | ||
| 2075 | .rq_offline = rq_offline_fair, | ||
| 2055 | #endif | 2076 | #endif |
| 2056 | 2077 | ||
| 2057 | .set_curr_task = set_curr_task_fair, | 2078 | .set_curr_task = set_curr_task_fair, |
| 2058 | .task_tick = task_tick_fair, | 2079 | .task_tick = task_tick_fair, |
| 2059 | .task_new = task_new_fair, | 2080 | .task_fork = task_fork_fair, |
| 2060 | 2081 | ||
| 2061 | .prio_changed = prio_changed_fair, | 2082 | .prio_changed = prio_changed_fair, |
| 2062 | .switched_to = switched_to_fair, | 2083 | .switched_to = switched_to_fair, |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 0d94083582c7..d5059fd761d9 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
| @@ -54,11 +54,6 @@ SCHED_FEAT(WAKEUP_SYNC, 0) | |||
| 54 | SCHED_FEAT(WAKEUP_OVERLAP, 0) | 54 | SCHED_FEAT(WAKEUP_OVERLAP, 0) |
| 55 | 55 | ||
| 56 | /* | 56 | /* |
| 57 | * Wakeup preemption towards tasks that run short | ||
| 58 | */ | ||
| 59 | SCHED_FEAT(WAKEUP_RUNNING, 0) | ||
| 60 | |||
| 61 | /* | ||
| 62 | * Use the SYNC wakeup hint, pipes and the likes use this to indicate | 57 | * Use the SYNC wakeup hint, pipes and the likes use this to indicate |
| 63 | * the remote end is likely to consume the data we just wrote, and | 58 | * the remote end is likely to consume the data we just wrote, and |
| 64 | * therefore has cache benefit from being placed on the same cpu, see | 59 | * therefore has cache benefit from being placed on the same cpu, see |
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index b133a28fcde3..33d5384a73a8 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
| @@ -97,7 +97,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p, | |||
| 97 | check_preempt_curr(rq, p, 0); | 97 | check_preempt_curr(rq, p, 0); |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | unsigned int get_rr_interval_idle(struct task_struct *task) | 100 | unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task) |
| 101 | { | 101 | { |
| 102 | return 0; | 102 | return 0; |
| 103 | } | 103 | } |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 5c5fef378415..aecbd9c6b20c 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -1721,7 +1721,7 @@ static void set_curr_task_rt(struct rq *rq) | |||
| 1721 | dequeue_pushable_task(rq, p); | 1721 | dequeue_pushable_task(rq, p); |
| 1722 | } | 1722 | } |
| 1723 | 1723 | ||
| 1724 | unsigned int get_rr_interval_rt(struct task_struct *task) | 1724 | unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) |
| 1725 | { | 1725 | { |
| 1726 | /* | 1726 | /* |
| 1727 | * Time slice is 0 for SCHED_FIFO tasks | 1727 | * Time slice is 0 for SCHED_FIFO tasks |
diff --git a/kernel/smp.c b/kernel/smp.c index a8c76069cf50..00a1d0ede532 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
| @@ -171,7 +171,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) | |||
| 171 | void generic_smp_call_function_interrupt(void) | 171 | void generic_smp_call_function_interrupt(void) |
| 172 | { | 172 | { |
| 173 | struct call_function_data *data; | 173 | struct call_function_data *data; |
| 174 | int cpu = get_cpu(); | 174 | int cpu = smp_processor_id(); |
| 175 | 175 | ||
| 176 | /* | 176 | /* |
| 177 | * Shouldn't receive this interrupt on a cpu that is not yet online. | 177 | * Shouldn't receive this interrupt on a cpu that is not yet online. |
| @@ -212,7 +212,6 @@ void generic_smp_call_function_interrupt(void) | |||
| 212 | csd_unlock(&data->csd); | 212 | csd_unlock(&data->csd); |
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | put_cpu(); | ||
| 216 | } | 215 | } |
| 217 | 216 | ||
| 218 | /* | 217 | /* |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 21939d9e830e..a09502e2ef75 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -697,7 +697,7 @@ void __init softirq_init(void) | |||
| 697 | open_softirq(HI_SOFTIRQ, tasklet_hi_action); | 697 | open_softirq(HI_SOFTIRQ, tasklet_hi_action); |
| 698 | } | 698 | } |
| 699 | 699 | ||
| 700 | static int ksoftirqd(void * __bind_cpu) | 700 | static int run_ksoftirqd(void * __bind_cpu) |
| 701 | { | 701 | { |
| 702 | set_current_state(TASK_INTERRUPTIBLE); | 702 | set_current_state(TASK_INTERRUPTIBLE); |
| 703 | 703 | ||
| @@ -810,7 +810,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, | |||
| 810 | switch (action) { | 810 | switch (action) { |
| 811 | case CPU_UP_PREPARE: | 811 | case CPU_UP_PREPARE: |
| 812 | case CPU_UP_PREPARE_FROZEN: | 812 | case CPU_UP_PREPARE_FROZEN: |
| 813 | p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); | 813 | p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); |
| 814 | if (IS_ERR(p)) { | 814 | if (IS_ERR(p)) { |
| 815 | printk("ksoftirqd for %i failed\n", hotcpu); | 815 | printk("ksoftirqd for %i failed\n", hotcpu); |
| 816 | return NOTIFY_BAD; | 816 | return NOTIFY_BAD; |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 81324d12eb35..d22579087e27 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
| @@ -22,9 +22,9 @@ | |||
| 22 | 22 | ||
| 23 | static DEFINE_SPINLOCK(print_lock); | 23 | static DEFINE_SPINLOCK(print_lock); |
| 24 | 24 | ||
| 25 | static DEFINE_PER_CPU(unsigned long, touch_timestamp); | 25 | static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */ |
| 26 | static DEFINE_PER_CPU(unsigned long, print_timestamp); | 26 | static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */ |
| 27 | static DEFINE_PER_CPU(struct task_struct *, watchdog_task); | 27 | static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); |
| 28 | 28 | ||
| 29 | static int __read_mostly did_panic; | 29 | static int __read_mostly did_panic; |
| 30 | int __read_mostly softlockup_thresh = 60; | 30 | int __read_mostly softlockup_thresh = 60; |
| @@ -70,12 +70,12 @@ static void __touch_softlockup_watchdog(void) | |||
| 70 | { | 70 | { |
| 71 | int this_cpu = raw_smp_processor_id(); | 71 | int this_cpu = raw_smp_processor_id(); |
| 72 | 72 | ||
| 73 | __raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu); | 73 | __raw_get_cpu_var(softlockup_touch_ts) = get_timestamp(this_cpu); |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | void touch_softlockup_watchdog(void) | 76 | void touch_softlockup_watchdog(void) |
| 77 | { | 77 | { |
| 78 | __raw_get_cpu_var(touch_timestamp) = 0; | 78 | __raw_get_cpu_var(softlockup_touch_ts) = 0; |
| 79 | } | 79 | } |
| 80 | EXPORT_SYMBOL(touch_softlockup_watchdog); | 80 | EXPORT_SYMBOL(touch_softlockup_watchdog); |
| 81 | 81 | ||
| @@ -85,7 +85,7 @@ void touch_all_softlockup_watchdogs(void) | |||
| 85 | 85 | ||
| 86 | /* Cause each CPU to re-update its timestamp rather than complain */ | 86 | /* Cause each CPU to re-update its timestamp rather than complain */ |
| 87 | for_each_online_cpu(cpu) | 87 | for_each_online_cpu(cpu) |
| 88 | per_cpu(touch_timestamp, cpu) = 0; | 88 | per_cpu(softlockup_touch_ts, cpu) = 0; |
| 89 | } | 89 | } |
| 90 | EXPORT_SYMBOL(touch_all_softlockup_watchdogs); | 90 | EXPORT_SYMBOL(touch_all_softlockup_watchdogs); |
| 91 | 91 | ||
| @@ -104,28 +104,28 @@ int proc_dosoftlockup_thresh(struct ctl_table *table, int write, | |||
| 104 | void softlockup_tick(void) | 104 | void softlockup_tick(void) |
| 105 | { | 105 | { |
| 106 | int this_cpu = smp_processor_id(); | 106 | int this_cpu = smp_processor_id(); |
| 107 | unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu); | 107 | unsigned long touch_ts = per_cpu(softlockup_touch_ts, this_cpu); |
| 108 | unsigned long print_timestamp; | 108 | unsigned long print_ts; |
| 109 | struct pt_regs *regs = get_irq_regs(); | 109 | struct pt_regs *regs = get_irq_regs(); |
| 110 | unsigned long now; | 110 | unsigned long now; |
| 111 | 111 | ||
| 112 | /* Is detection switched off? */ | 112 | /* Is detection switched off? */ |
| 113 | if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) { | 113 | if (!per_cpu(softlockup_watchdog, this_cpu) || softlockup_thresh <= 0) { |
| 114 | /* Be sure we don't false trigger if switched back on */ | 114 | /* Be sure we don't false trigger if switched back on */ |
| 115 | if (touch_timestamp) | 115 | if (touch_ts) |
| 116 | per_cpu(touch_timestamp, this_cpu) = 0; | 116 | per_cpu(softlockup_touch_ts, this_cpu) = 0; |
| 117 | return; | 117 | return; |
| 118 | } | 118 | } |
| 119 | 119 | ||
| 120 | if (touch_timestamp == 0) { | 120 | if (touch_ts == 0) { |
| 121 | __touch_softlockup_watchdog(); | 121 | __touch_softlockup_watchdog(); |
| 122 | return; | 122 | return; |
| 123 | } | 123 | } |
| 124 | 124 | ||
| 125 | print_timestamp = per_cpu(print_timestamp, this_cpu); | 125 | print_ts = per_cpu(softlockup_print_ts, this_cpu); |
| 126 | 126 | ||
| 127 | /* report at most once a second */ | 127 | /* report at most once a second */ |
| 128 | if (print_timestamp == touch_timestamp || did_panic) | 128 | if (print_ts == touch_ts || did_panic) |
| 129 | return; | 129 | return; |
| 130 | 130 | ||
| 131 | /* do not print during early bootup: */ | 131 | /* do not print during early bootup: */ |
| @@ -140,18 +140,18 @@ void softlockup_tick(void) | |||
| 140 | * Wake up the high-prio watchdog task twice per | 140 | * Wake up the high-prio watchdog task twice per |
| 141 | * threshold timespan. | 141 | * threshold timespan. |
| 142 | */ | 142 | */ |
| 143 | if (now > touch_timestamp + softlockup_thresh/2) | 143 | if (now > touch_ts + softlockup_thresh/2) |
| 144 | wake_up_process(per_cpu(watchdog_task, this_cpu)); | 144 | wake_up_process(per_cpu(softlockup_watchdog, this_cpu)); |
| 145 | 145 | ||
| 146 | /* Warn about unreasonable delays: */ | 146 | /* Warn about unreasonable delays: */ |
| 147 | if (now <= (touch_timestamp + softlockup_thresh)) | 147 | if (now <= (touch_ts + softlockup_thresh)) |
| 148 | return; | 148 | return; |
| 149 | 149 | ||
| 150 | per_cpu(print_timestamp, this_cpu) = touch_timestamp; | 150 | per_cpu(softlockup_print_ts, this_cpu) = touch_ts; |
| 151 | 151 | ||
| 152 | spin_lock(&print_lock); | 152 | spin_lock(&print_lock); |
| 153 | printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n", | 153 | printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n", |
| 154 | this_cpu, now - touch_timestamp, | 154 | this_cpu, now - touch_ts, |
| 155 | current->comm, task_pid_nr(current)); | 155 | current->comm, task_pid_nr(current)); |
| 156 | print_modules(); | 156 | print_modules(); |
| 157 | print_irqtrace_events(current); | 157 | print_irqtrace_events(current); |
| @@ -209,32 +209,32 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 209 | switch (action) { | 209 | switch (action) { |
| 210 | case CPU_UP_PREPARE: | 210 | case CPU_UP_PREPARE: |
| 211 | case CPU_UP_PREPARE_FROZEN: | 211 | case CPU_UP_PREPARE_FROZEN: |
| 212 | BUG_ON(per_cpu(watchdog_task, hotcpu)); | 212 | BUG_ON(per_cpu(softlockup_watchdog, hotcpu)); |
| 213 | p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu); | 213 | p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu); |
| 214 | if (IS_ERR(p)) { | 214 | if (IS_ERR(p)) { |
| 215 | printk(KERN_ERR "watchdog for %i failed\n", hotcpu); | 215 | printk(KERN_ERR "watchdog for %i failed\n", hotcpu); |
| 216 | return NOTIFY_BAD; | 216 | return NOTIFY_BAD; |
| 217 | } | 217 | } |
| 218 | per_cpu(touch_timestamp, hotcpu) = 0; | 218 | per_cpu(softlockup_touch_ts, hotcpu) = 0; |
| 219 | per_cpu(watchdog_task, hotcpu) = p; | 219 | per_cpu(softlockup_watchdog, hotcpu) = p; |
| 220 | kthread_bind(p, hotcpu); | 220 | kthread_bind(p, hotcpu); |
| 221 | break; | 221 | break; |
| 222 | case CPU_ONLINE: | 222 | case CPU_ONLINE: |
| 223 | case CPU_ONLINE_FROZEN: | 223 | case CPU_ONLINE_FROZEN: |
| 224 | wake_up_process(per_cpu(watchdog_task, hotcpu)); | 224 | wake_up_process(per_cpu(softlockup_watchdog, hotcpu)); |
| 225 | break; | 225 | break; |
| 226 | #ifdef CONFIG_HOTPLUG_CPU | 226 | #ifdef CONFIG_HOTPLUG_CPU |
| 227 | case CPU_UP_CANCELED: | 227 | case CPU_UP_CANCELED: |
| 228 | case CPU_UP_CANCELED_FROZEN: | 228 | case CPU_UP_CANCELED_FROZEN: |
| 229 | if (!per_cpu(watchdog_task, hotcpu)) | 229 | if (!per_cpu(softlockup_watchdog, hotcpu)) |
| 230 | break; | 230 | break; |
| 231 | /* Unbind so it can run. Fall thru. */ | 231 | /* Unbind so it can run. Fall thru. */ |
| 232 | kthread_bind(per_cpu(watchdog_task, hotcpu), | 232 | kthread_bind(per_cpu(softlockup_watchdog, hotcpu), |
| 233 | cpumask_any(cpu_online_mask)); | 233 | cpumask_any(cpu_online_mask)); |
| 234 | case CPU_DEAD: | 234 | case CPU_DEAD: |
| 235 | case CPU_DEAD_FROZEN: | 235 | case CPU_DEAD_FROZEN: |
| 236 | p = per_cpu(watchdog_task, hotcpu); | 236 | p = per_cpu(softlockup_watchdog, hotcpu); |
| 237 | per_cpu(watchdog_task, hotcpu) = NULL; | 237 | per_cpu(softlockup_watchdog, hotcpu) = NULL; |
| 238 | kthread_stop(p); | 238 | kthread_stop(p); |
| 239 | break; | 239 | break; |
| 240 | #endif /* CONFIG_HOTPLUG_CPU */ | 240 | #endif /* CONFIG_HOTPLUG_CPU */ |
diff --git a/kernel/sys.c b/kernel/sys.c index 585d6cd10040..20ccfb5da6af 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -189,10 +189,10 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) | |||
| 189 | !(user = find_user(who))) | 189 | !(user = find_user(who))) |
| 190 | goto out_unlock; /* No processes for this user */ | 190 | goto out_unlock; /* No processes for this user */ |
| 191 | 191 | ||
| 192 | do_each_thread(g, p) | 192 | do_each_thread(g, p) { |
| 193 | if (__task_cred(p)->uid == who) | 193 | if (__task_cred(p)->uid == who) |
| 194 | error = set_one_prio(p, niceval, error); | 194 | error = set_one_prio(p, niceval, error); |
| 195 | while_each_thread(g, p); | 195 | } while_each_thread(g, p); |
| 196 | if (who != cred->uid) | 196 | if (who != cred->uid) |
| 197 | free_uid(user); /* For find_user() */ | 197 | free_uid(user); /* For find_user() */ |
| 198 | break; | 198 | break; |
| @@ -252,13 +252,13 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) | |||
| 252 | !(user = find_user(who))) | 252 | !(user = find_user(who))) |
| 253 | goto out_unlock; /* No processes for this user */ | 253 | goto out_unlock; /* No processes for this user */ |
| 254 | 254 | ||
| 255 | do_each_thread(g, p) | 255 | do_each_thread(g, p) { |
| 256 | if (__task_cred(p)->uid == who) { | 256 | if (__task_cred(p)->uid == who) { |
| 257 | niceval = 20 - task_nice(p); | 257 | niceval = 20 - task_nice(p); |
| 258 | if (niceval > retval) | 258 | if (niceval > retval) |
| 259 | retval = niceval; | 259 | retval = niceval; |
| 260 | } | 260 | } |
| 261 | while_each_thread(g, p); | 261 | } while_each_thread(g, p); |
| 262 | if (who != cred->uid) | 262 | if (who != cred->uid) |
| 263 | free_uid(user); /* for find_user() */ | 263 | free_uid(user); /* for find_user() */ |
| 264 | break; | 264 | break; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9327a26765c5..45e4bef0012a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -244,6 +244,10 @@ static int min_sched_granularity_ns = 100000; /* 100 usecs */ | |||
| 244 | static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ | 244 | static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ |
| 245 | static int min_wakeup_granularity_ns; /* 0 usecs */ | 245 | static int min_wakeup_granularity_ns; /* 0 usecs */ |
| 246 | static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ | 246 | static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ |
| 247 | static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; | ||
| 248 | static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; | ||
| 249 | static int min_sched_shares_ratelimit = 100000; /* 100 usec */ | ||
| 250 | static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */ | ||
| 247 | #endif | 251 | #endif |
| 248 | 252 | ||
| 249 | static struct ctl_table kern_table[] = { | 253 | static struct ctl_table kern_table[] = { |
| @@ -260,7 +264,7 @@ static struct ctl_table kern_table[] = { | |||
| 260 | .data = &sysctl_sched_min_granularity, | 264 | .data = &sysctl_sched_min_granularity, |
| 261 | .maxlen = sizeof(unsigned int), | 265 | .maxlen = sizeof(unsigned int), |
| 262 | .mode = 0644, | 266 | .mode = 0644, |
| 263 | .proc_handler = sched_nr_latency_handler, | 267 | .proc_handler = sched_proc_update_handler, |
| 264 | .extra1 = &min_sched_granularity_ns, | 268 | .extra1 = &min_sched_granularity_ns, |
| 265 | .extra2 = &max_sched_granularity_ns, | 269 | .extra2 = &max_sched_granularity_ns, |
| 266 | }, | 270 | }, |
| @@ -269,7 +273,7 @@ static struct ctl_table kern_table[] = { | |||
| 269 | .data = &sysctl_sched_latency, | 273 | .data = &sysctl_sched_latency, |
| 270 | .maxlen = sizeof(unsigned int), | 274 | .maxlen = sizeof(unsigned int), |
| 271 | .mode = 0644, | 275 | .mode = 0644, |
| 272 | .proc_handler = sched_nr_latency_handler, | 276 | .proc_handler = sched_proc_update_handler, |
| 273 | .extra1 = &min_sched_granularity_ns, | 277 | .extra1 = &min_sched_granularity_ns, |
| 274 | .extra2 = &max_sched_granularity_ns, | 278 | .extra2 = &max_sched_granularity_ns, |
| 275 | }, | 279 | }, |
| @@ -278,7 +282,7 @@ static struct ctl_table kern_table[] = { | |||
| 278 | .data = &sysctl_sched_wakeup_granularity, | 282 | .data = &sysctl_sched_wakeup_granularity, |
| 279 | .maxlen = sizeof(unsigned int), | 283 | .maxlen = sizeof(unsigned int), |
| 280 | .mode = 0644, | 284 | .mode = 0644, |
| 281 | .proc_handler = proc_dointvec_minmax, | 285 | .proc_handler = sched_proc_update_handler, |
| 282 | .extra1 = &min_wakeup_granularity_ns, | 286 | .extra1 = &min_wakeup_granularity_ns, |
| 283 | .extra2 = &max_wakeup_granularity_ns, | 287 | .extra2 = &max_wakeup_granularity_ns, |
| 284 | }, | 288 | }, |
| @@ -287,7 +291,18 @@ static struct ctl_table kern_table[] = { | |||
| 287 | .data = &sysctl_sched_shares_ratelimit, | 291 | .data = &sysctl_sched_shares_ratelimit, |
| 288 | .maxlen = sizeof(unsigned int), | 292 | .maxlen = sizeof(unsigned int), |
| 289 | .mode = 0644, | 293 | .mode = 0644, |
| 290 | .proc_handler = proc_dointvec, | 294 | .proc_handler = sched_proc_update_handler, |
| 295 | .extra1 = &min_sched_shares_ratelimit, | ||
| 296 | .extra2 = &max_sched_shares_ratelimit, | ||
| 297 | }, | ||
| 298 | { | ||
| 299 | .procname = "sched_tunable_scaling", | ||
| 300 | .data = &sysctl_sched_tunable_scaling, | ||
| 301 | .maxlen = sizeof(enum sched_tunable_scaling), | ||
| 302 | .mode = 0644, | ||
| 303 | .proc_handler = sched_proc_update_handler, | ||
| 304 | .extra1 = &min_sched_tunable_scaling, | ||
| 305 | .extra2 = &max_sched_tunable_scaling, | ||
| 291 | }, | 306 | }, |
| 292 | { | 307 | { |
| 293 | .procname = "sched_shares_thresh", | 308 | .procname = "sched_shares_thresh", |
| @@ -298,13 +313,6 @@ static struct ctl_table kern_table[] = { | |||
| 298 | .extra1 = &zero, | 313 | .extra1 = &zero, |
| 299 | }, | 314 | }, |
| 300 | { | 315 | { |
| 301 | .procname = "sched_features", | ||
| 302 | .data = &sysctl_sched_features, | ||
| 303 | .maxlen = sizeof(unsigned int), | ||
| 304 | .mode = 0644, | ||
| 305 | .proc_handler = proc_dointvec, | ||
| 306 | }, | ||
| 307 | { | ||
| 308 | .procname = "sched_migration_cost", | 316 | .procname = "sched_migration_cost", |
| 309 | .data = &sysctl_sched_migration_cost, | 317 | .data = &sysctl_sched_migration_cost, |
| 310 | .maxlen = sizeof(unsigned int), | 318 | .maxlen = sizeof(unsigned int), |
| @@ -1043,7 +1051,7 @@ static struct ctl_table vm_table[] = { | |||
| 1043 | .extra2 = &one_hundred, | 1051 | .extra2 = &one_hundred, |
| 1044 | }, | 1052 | }, |
| 1045 | #ifdef CONFIG_HUGETLB_PAGE | 1053 | #ifdef CONFIG_HUGETLB_PAGE |
| 1046 | { | 1054 | { |
| 1047 | .procname = "nr_hugepages", | 1055 | .procname = "nr_hugepages", |
| 1048 | .data = NULL, | 1056 | .data = NULL, |
| 1049 | .maxlen = sizeof(unsigned long), | 1057 | .maxlen = sizeof(unsigned long), |
| @@ -1051,7 +1059,18 @@ static struct ctl_table vm_table[] = { | |||
| 1051 | .proc_handler = hugetlb_sysctl_handler, | 1059 | .proc_handler = hugetlb_sysctl_handler, |
| 1052 | .extra1 = (void *)&hugetlb_zero, | 1060 | .extra1 = (void *)&hugetlb_zero, |
| 1053 | .extra2 = (void *)&hugetlb_infinity, | 1061 | .extra2 = (void *)&hugetlb_infinity, |
| 1054 | }, | 1062 | }, |
| 1063 | #ifdef CONFIG_NUMA | ||
| 1064 | { | ||
| 1065 | .procname = "nr_hugepages_mempolicy", | ||
| 1066 | .data = NULL, | ||
| 1067 | .maxlen = sizeof(unsigned long), | ||
| 1068 | .mode = 0644, | ||
| 1069 | .proc_handler = &hugetlb_mempolicy_sysctl_handler, | ||
| 1070 | .extra1 = (void *)&hugetlb_zero, | ||
| 1071 | .extra2 = (void *)&hugetlb_infinity, | ||
| 1072 | }, | ||
| 1073 | #endif | ||
| 1055 | { | 1074 | { |
| 1056 | .procname = "hugetlb_shm_group", | 1075 | .procname = "hugetlb_shm_group", |
| 1057 | .data = &sysctl_hugetlb_shm_group, | 1076 | .data = &sysctl_hugetlb_shm_group, |
| @@ -1112,7 +1131,8 @@ static struct ctl_table vm_table[] = { | |||
| 1112 | .data = &sysctl_max_map_count, | 1131 | .data = &sysctl_max_map_count, |
| 1113 | .maxlen = sizeof(sysctl_max_map_count), | 1132 | .maxlen = sizeof(sysctl_max_map_count), |
| 1114 | .mode = 0644, | 1133 | .mode = 0644, |
| 1115 | .proc_handler = proc_dointvec | 1134 | .proc_handler = proc_dointvec, |
| 1135 | .extra1 = &zero, | ||
| 1116 | }, | 1136 | }, |
| 1117 | #else | 1137 | #else |
| 1118 | { | 1138 | { |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 665c76edbf17..9d80db4747d4 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
| @@ -150,6 +150,9 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) | |||
| 150 | P_ns(expires_next); | 150 | P_ns(expires_next); |
| 151 | P(hres_active); | 151 | P(hres_active); |
| 152 | P(nr_events); | 152 | P(nr_events); |
| 153 | P(nr_retries); | ||
| 154 | P(nr_hangs); | ||
| 155 | P_ns(max_hang_time); | ||
| 153 | #endif | 156 | #endif |
| 154 | #undef P | 157 | #undef P |
| 155 | #undef P_ns | 158 | #undef P_ns |
| @@ -254,7 +257,7 @@ static int timer_list_show(struct seq_file *m, void *v) | |||
| 254 | u64 now = ktime_to_ns(ktime_get()); | 257 | u64 now = ktime_to_ns(ktime_get()); |
| 255 | int cpu; | 258 | int cpu; |
| 256 | 259 | ||
| 257 | SEQ_printf(m, "Timer List Version: v0.4\n"); | 260 | SEQ_printf(m, "Timer List Version: v0.5\n"); |
| 258 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); | 261 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); |
| 259 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); | 262 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); |
| 260 | 263 | ||
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index ee5681f8d7ec..63b117e9eba1 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c | |||
| @@ -86,7 +86,7 @@ static DEFINE_SPINLOCK(table_lock); | |||
| 86 | /* | 86 | /* |
| 87 | * Per-CPU lookup locks for fast hash lookup: | 87 | * Per-CPU lookup locks for fast hash lookup: |
| 88 | */ | 88 | */ |
| 89 | static DEFINE_PER_CPU(spinlock_t, lookup_lock); | 89 | static DEFINE_PER_CPU(spinlock_t, tstats_lookup_lock); |
| 90 | 90 | ||
| 91 | /* | 91 | /* |
| 92 | * Mutex to serialize state changes with show-stats activities: | 92 | * Mutex to serialize state changes with show-stats activities: |
| @@ -245,7 +245,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | |||
| 245 | if (likely(!timer_stats_active)) | 245 | if (likely(!timer_stats_active)) |
| 246 | return; | 246 | return; |
| 247 | 247 | ||
| 248 | lock = &per_cpu(lookup_lock, raw_smp_processor_id()); | 248 | lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id()); |
| 249 | 249 | ||
| 250 | input.timer = timer; | 250 | input.timer = timer; |
| 251 | input.start_func = startf; | 251 | input.start_func = startf; |
| @@ -348,9 +348,10 @@ static void sync_access(void) | |||
| 348 | int cpu; | 348 | int cpu; |
| 349 | 349 | ||
| 350 | for_each_online_cpu(cpu) { | 350 | for_each_online_cpu(cpu) { |
| 351 | spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags); | 351 | spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu); |
| 352 | spin_lock_irqsave(lock, flags); | ||
| 352 | /* nothing */ | 353 | /* nothing */ |
| 353 | spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags); | 354 | spin_unlock_irqrestore(lock, flags); |
| 354 | } | 355 | } |
| 355 | } | 356 | } |
| 356 | 357 | ||
| @@ -408,7 +409,7 @@ void __init init_timer_stats(void) | |||
| 408 | int cpu; | 409 | int cpu; |
| 409 | 410 | ||
| 410 | for_each_possible_cpu(cpu) | 411 | for_each_possible_cpu(cpu) |
| 411 | spin_lock_init(&per_cpu(lookup_lock, cpu)); | 412 | spin_lock_init(&per_cpu(tstats_lookup_lock, cpu)); |
| 412 | } | 413 | } |
| 413 | 414 | ||
| 414 | static int __init init_tstats_procfs(void) | 415 | static int __init init_tstats_procfs(void) |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 874f2893cff0..c82dfd92fdfd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -86,17 +86,17 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set) | |||
| 86 | */ | 86 | */ |
| 87 | static int tracing_disabled = 1; | 87 | static int tracing_disabled = 1; |
| 88 | 88 | ||
| 89 | DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); | 89 | DEFINE_PER_CPU(int, ftrace_cpu_disabled); |
| 90 | 90 | ||
| 91 | static inline void ftrace_disable_cpu(void) | 91 | static inline void ftrace_disable_cpu(void) |
| 92 | { | 92 | { |
| 93 | preempt_disable(); | 93 | preempt_disable(); |
| 94 | local_inc(&__get_cpu_var(ftrace_cpu_disabled)); | 94 | __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled)); |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | static inline void ftrace_enable_cpu(void) | 97 | static inline void ftrace_enable_cpu(void) |
| 98 | { | 98 | { |
| 99 | local_dec(&__get_cpu_var(ftrace_cpu_disabled)); | 99 | __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled)); |
| 100 | preempt_enable(); | 100 | preempt_enable(); |
| 101 | } | 101 | } |
| 102 | 102 | ||
| @@ -203,7 +203,7 @@ cycle_t ftrace_now(int cpu) | |||
| 203 | */ | 203 | */ |
| 204 | static struct trace_array max_tr; | 204 | static struct trace_array max_tr; |
| 205 | 205 | ||
| 206 | static DEFINE_PER_CPU(struct trace_array_cpu, max_data); | 206 | static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data); |
| 207 | 207 | ||
| 208 | /* tracer_enabled is used to toggle activation of a tracer */ | 208 | /* tracer_enabled is used to toggle activation of a tracer */ |
| 209 | static int tracer_enabled = 1; | 209 | static int tracer_enabled = 1; |
| @@ -1085,7 +1085,7 @@ trace_function(struct trace_array *tr, | |||
| 1085 | struct ftrace_entry *entry; | 1085 | struct ftrace_entry *entry; |
| 1086 | 1086 | ||
| 1087 | /* If we are reading the ring buffer, don't trace */ | 1087 | /* If we are reading the ring buffer, don't trace */ |
| 1088 | if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) | 1088 | if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) |
| 1089 | return; | 1089 | return; |
| 1090 | 1090 | ||
| 1091 | event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), | 1091 | event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), |
| @@ -1361,11 +1361,7 @@ int trace_array_vprintk(struct trace_array *tr, | |||
| 1361 | pause_graph_tracing(); | 1361 | pause_graph_tracing(); |
| 1362 | raw_local_irq_save(irq_flags); | 1362 | raw_local_irq_save(irq_flags); |
| 1363 | __raw_spin_lock(&trace_buf_lock); | 1363 | __raw_spin_lock(&trace_buf_lock); |
| 1364 | if (args == NULL) { | 1364 | len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); |
| 1365 | strncpy(trace_buf, fmt, TRACE_BUF_SIZE); | ||
| 1366 | len = strlen(trace_buf); | ||
| 1367 | } else | ||
| 1368 | len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); | ||
| 1369 | 1365 | ||
| 1370 | size = sizeof(*entry) + len + 1; | 1366 | size = sizeof(*entry) + len + 1; |
| 1371 | buffer = tr->buffer; | 1367 | buffer = tr->buffer; |
| @@ -1516,6 +1512,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos) | |||
| 1516 | int i = (int)*pos; | 1512 | int i = (int)*pos; |
| 1517 | void *ent; | 1513 | void *ent; |
| 1518 | 1514 | ||
| 1515 | WARN_ON_ONCE(iter->leftover); | ||
| 1516 | |||
| 1519 | (*pos)++; | 1517 | (*pos)++; |
| 1520 | 1518 | ||
| 1521 | /* can't go backwards */ | 1519 | /* can't go backwards */ |
| @@ -1614,8 +1612,16 @@ static void *s_start(struct seq_file *m, loff_t *pos) | |||
| 1614 | ; | 1612 | ; |
| 1615 | 1613 | ||
| 1616 | } else { | 1614 | } else { |
| 1617 | l = *pos - 1; | 1615 | /* |
| 1618 | p = s_next(m, p, &l); | 1616 | * If we overflowed the seq_file before, then we want |
| 1617 | * to just reuse the trace_seq buffer again. | ||
| 1618 | */ | ||
| 1619 | if (iter->leftover) | ||
| 1620 | p = iter; | ||
| 1621 | else { | ||
| 1622 | l = *pos - 1; | ||
| 1623 | p = s_next(m, p, &l); | ||
| 1624 | } | ||
| 1619 | } | 1625 | } |
| 1620 | 1626 | ||
| 1621 | trace_event_read_lock(); | 1627 | trace_event_read_lock(); |
| @@ -1923,6 +1929,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) | |||
| 1923 | static int s_show(struct seq_file *m, void *v) | 1929 | static int s_show(struct seq_file *m, void *v) |
| 1924 | { | 1930 | { |
| 1925 | struct trace_iterator *iter = v; | 1931 | struct trace_iterator *iter = v; |
| 1932 | int ret; | ||
| 1926 | 1933 | ||
| 1927 | if (iter->ent == NULL) { | 1934 | if (iter->ent == NULL) { |
| 1928 | if (iter->tr) { | 1935 | if (iter->tr) { |
| @@ -1942,9 +1949,27 @@ static int s_show(struct seq_file *m, void *v) | |||
| 1942 | if (!(trace_flags & TRACE_ITER_VERBOSE)) | 1949 | if (!(trace_flags & TRACE_ITER_VERBOSE)) |
| 1943 | print_func_help_header(m); | 1950 | print_func_help_header(m); |
| 1944 | } | 1951 | } |
| 1952 | } else if (iter->leftover) { | ||
| 1953 | /* | ||
| 1954 | * If we filled the seq_file buffer earlier, we | ||
| 1955 | * want to just show it now. | ||
| 1956 | */ | ||
| 1957 | ret = trace_print_seq(m, &iter->seq); | ||
| 1958 | |||
| 1959 | /* ret should this time be zero, but you never know */ | ||
| 1960 | iter->leftover = ret; | ||
| 1961 | |||
| 1945 | } else { | 1962 | } else { |
| 1946 | print_trace_line(iter); | 1963 | print_trace_line(iter); |
| 1947 | trace_print_seq(m, &iter->seq); | 1964 | ret = trace_print_seq(m, &iter->seq); |
| 1965 | /* | ||
| 1966 | * If we overflow the seq_file buffer, then it will | ||
| 1967 | * ask us for this data again at start up. | ||
| 1968 | * Use that instead. | ||
| 1969 | * ret is 0 if seq_file write succeeded. | ||
| 1970 | * -1 otherwise. | ||
| 1971 | */ | ||
| 1972 | iter->leftover = ret; | ||
| 1948 | } | 1973 | } |
| 1949 | 1974 | ||
| 1950 | return 0; | 1975 | return 0; |
| @@ -2898,6 +2923,10 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) | |||
| 2898 | else | 2923 | else |
| 2899 | cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); | 2924 | cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); |
| 2900 | 2925 | ||
| 2926 | |||
| 2927 | if (iter->trace->pipe_close) | ||
| 2928 | iter->trace->pipe_close(iter); | ||
| 2929 | |||
| 2901 | mutex_unlock(&trace_types_lock); | 2930 | mutex_unlock(&trace_types_lock); |
| 2902 | 2931 | ||
| 2903 | free_cpumask_var(iter->started); | 2932 | free_cpumask_var(iter->started); |
| @@ -3320,6 +3349,16 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, | |||
| 3320 | return cnt; | 3349 | return cnt; |
| 3321 | } | 3350 | } |
| 3322 | 3351 | ||
| 3352 | static int mark_printk(const char *fmt, ...) | ||
| 3353 | { | ||
| 3354 | int ret; | ||
| 3355 | va_list args; | ||
| 3356 | va_start(args, fmt); | ||
| 3357 | ret = trace_vprintk(0, fmt, args); | ||
| 3358 | va_end(args); | ||
| 3359 | return ret; | ||
| 3360 | } | ||
| 3361 | |||
| 3323 | static ssize_t | 3362 | static ssize_t |
| 3324 | tracing_mark_write(struct file *filp, const char __user *ubuf, | 3363 | tracing_mark_write(struct file *filp, const char __user *ubuf, |
| 3325 | size_t cnt, loff_t *fpos) | 3364 | size_t cnt, loff_t *fpos) |
| @@ -3346,7 +3385,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, | |||
| 3346 | } else | 3385 | } else |
| 3347 | buf[cnt] = '\0'; | 3386 | buf[cnt] = '\0'; |
| 3348 | 3387 | ||
| 3349 | cnt = trace_vprintk(0, buf, NULL); | 3388 | cnt = mark_printk("%s", buf); |
| 3350 | kfree(buf); | 3389 | kfree(buf); |
| 3351 | *fpos += cnt; | 3390 | *fpos += cnt; |
| 3352 | 3391 | ||
| @@ -4415,7 +4454,7 @@ __init static int tracer_alloc_buffers(void) | |||
| 4415 | /* Allocate the first page for all buffers */ | 4454 | /* Allocate the first page for all buffers */ |
| 4416 | for_each_tracing_cpu(i) { | 4455 | for_each_tracing_cpu(i) { |
| 4417 | global_trace.data[i] = &per_cpu(global_trace_cpu, i); | 4456 | global_trace.data[i] = &per_cpu(global_trace_cpu, i); |
| 4418 | max_tr.data[i] = &per_cpu(max_data, i); | 4457 | max_tr.data[i] = &per_cpu(max_tr_data, i); |
| 4419 | } | 4458 | } |
| 4420 | 4459 | ||
| 4421 | trace_init_cmdlines(); | 4460 | trace_init_cmdlines(); |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1d7f4830a80d..a52bed2eedd8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -272,6 +272,7 @@ struct tracer_flags { | |||
| 272 | * @pipe_open: called when the trace_pipe file is opened | 272 | * @pipe_open: called when the trace_pipe file is opened |
| 273 | * @wait_pipe: override how the user waits for traces on trace_pipe | 273 | * @wait_pipe: override how the user waits for traces on trace_pipe |
| 274 | * @close: called when the trace file is released | 274 | * @close: called when the trace file is released |
| 275 | * @pipe_close: called when the trace_pipe file is released | ||
| 275 | * @read: override the default read callback on trace_pipe | 276 | * @read: override the default read callback on trace_pipe |
| 276 | * @splice_read: override the default splice_read callback on trace_pipe | 277 | * @splice_read: override the default splice_read callback on trace_pipe |
| 277 | * @selftest: selftest to run on boot (see trace_selftest.c) | 278 | * @selftest: selftest to run on boot (see trace_selftest.c) |
| @@ -290,6 +291,7 @@ struct tracer { | |||
| 290 | void (*pipe_open)(struct trace_iterator *iter); | 291 | void (*pipe_open)(struct trace_iterator *iter); |
| 291 | void (*wait_pipe)(struct trace_iterator *iter); | 292 | void (*wait_pipe)(struct trace_iterator *iter); |
| 292 | void (*close)(struct trace_iterator *iter); | 293 | void (*close)(struct trace_iterator *iter); |
| 294 | void (*pipe_close)(struct trace_iterator *iter); | ||
| 293 | ssize_t (*read)(struct trace_iterator *iter, | 295 | ssize_t (*read)(struct trace_iterator *iter, |
| 294 | struct file *filp, char __user *ubuf, | 296 | struct file *filp, char __user *ubuf, |
| 295 | size_t cnt, loff_t *ppos); | 297 | size_t cnt, loff_t *ppos); |
| @@ -441,7 +443,7 @@ extern int DYN_FTRACE_TEST_NAME(void); | |||
| 441 | 443 | ||
| 442 | extern int ring_buffer_expanded; | 444 | extern int ring_buffer_expanded; |
| 443 | extern bool tracing_selftest_disabled; | 445 | extern bool tracing_selftest_disabled; |
| 444 | DECLARE_PER_CPU(local_t, ftrace_cpu_disabled); | 446 | DECLARE_PER_CPU(int, ftrace_cpu_disabled); |
| 445 | 447 | ||
| 446 | #ifdef CONFIG_FTRACE_STARTUP_TEST | 448 | #ifdef CONFIG_FTRACE_STARTUP_TEST |
| 447 | extern int trace_selftest_startup_function(struct tracer *trace, | 449 | extern int trace_selftest_startup_function(struct tracer *trace, |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 45e6c01b2e4d..b1342c5d37cf 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
| @@ -14,9 +14,20 @@ | |||
| 14 | #include "trace.h" | 14 | #include "trace.h" |
| 15 | #include "trace_output.h" | 15 | #include "trace_output.h" |
| 16 | 16 | ||
| 17 | struct fgraph_data { | 17 | struct fgraph_cpu_data { |
| 18 | pid_t last_pid; | 18 | pid_t last_pid; |
| 19 | int depth; | 19 | int depth; |
| 20 | int ignore; | ||
| 21 | }; | ||
| 22 | |||
| 23 | struct fgraph_data { | ||
| 24 | struct fgraph_cpu_data *cpu_data; | ||
| 25 | |||
| 26 | /* Place to preserve last processed entry. */ | ||
| 27 | struct ftrace_graph_ent_entry ent; | ||
| 28 | struct ftrace_graph_ret_entry ret; | ||
| 29 | int failed; | ||
| 30 | int cpu; | ||
| 20 | }; | 31 | }; |
| 21 | 32 | ||
| 22 | #define TRACE_GRAPH_INDENT 2 | 33 | #define TRACE_GRAPH_INDENT 2 |
| @@ -176,7 +187,7 @@ static int __trace_graph_entry(struct trace_array *tr, | |||
| 176 | struct ring_buffer *buffer = tr->buffer; | 187 | struct ring_buffer *buffer = tr->buffer; |
| 177 | struct ftrace_graph_ent_entry *entry; | 188 | struct ftrace_graph_ent_entry *entry; |
| 178 | 189 | ||
| 179 | if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) | 190 | if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) |
| 180 | return 0; | 191 | return 0; |
| 181 | 192 | ||
| 182 | event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, | 193 | event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, |
| @@ -240,7 +251,7 @@ static void __trace_graph_return(struct trace_array *tr, | |||
| 240 | struct ring_buffer *buffer = tr->buffer; | 251 | struct ring_buffer *buffer = tr->buffer; |
| 241 | struct ftrace_graph_ret_entry *entry; | 252 | struct ftrace_graph_ret_entry *entry; |
| 242 | 253 | ||
| 243 | if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) | 254 | if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) |
| 244 | return; | 255 | return; |
| 245 | 256 | ||
| 246 | event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, | 257 | event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, |
| @@ -384,7 +395,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) | |||
| 384 | if (!data) | 395 | if (!data) |
| 385 | return TRACE_TYPE_HANDLED; | 396 | return TRACE_TYPE_HANDLED; |
| 386 | 397 | ||
| 387 | last_pid = &(per_cpu_ptr(data, cpu)->last_pid); | 398 | last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); |
| 388 | 399 | ||
| 389 | if (*last_pid == pid) | 400 | if (*last_pid == pid) |
| 390 | return TRACE_TYPE_HANDLED; | 401 | return TRACE_TYPE_HANDLED; |
| @@ -435,26 +446,49 @@ static struct ftrace_graph_ret_entry * | |||
| 435 | get_return_for_leaf(struct trace_iterator *iter, | 446 | get_return_for_leaf(struct trace_iterator *iter, |
| 436 | struct ftrace_graph_ent_entry *curr) | 447 | struct ftrace_graph_ent_entry *curr) |
| 437 | { | 448 | { |
| 438 | struct ring_buffer_iter *ring_iter; | 449 | struct fgraph_data *data = iter->private; |
| 450 | struct ring_buffer_iter *ring_iter = NULL; | ||
| 439 | struct ring_buffer_event *event; | 451 | struct ring_buffer_event *event; |
| 440 | struct ftrace_graph_ret_entry *next; | 452 | struct ftrace_graph_ret_entry *next; |
| 441 | 453 | ||
| 442 | ring_iter = iter->buffer_iter[iter->cpu]; | 454 | /* |
| 455 | * If the previous output failed to write to the seq buffer, | ||
| 456 | * then we just reuse the data from before. | ||
| 457 | */ | ||
| 458 | if (data && data->failed) { | ||
| 459 | curr = &data->ent; | ||
| 460 | next = &data->ret; | ||
| 461 | } else { | ||
| 443 | 462 | ||
| 444 | /* First peek to compare current entry and the next one */ | 463 | ring_iter = iter->buffer_iter[iter->cpu]; |
| 445 | if (ring_iter) | 464 | |
| 446 | event = ring_buffer_iter_peek(ring_iter, NULL); | 465 | /* First peek to compare current entry and the next one */ |
| 447 | else { | 466 | if (ring_iter) |
| 448 | /* We need to consume the current entry to see the next one */ | 467 | event = ring_buffer_iter_peek(ring_iter, NULL); |
| 449 | ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); | 468 | else { |
| 450 | event = ring_buffer_peek(iter->tr->buffer, iter->cpu, | 469 | /* |
| 451 | NULL); | 470 | * We need to consume the current entry to see |
| 452 | } | 471 | * the next one. |
| 472 | */ | ||
| 473 | ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); | ||
| 474 | event = ring_buffer_peek(iter->tr->buffer, iter->cpu, | ||
| 475 | NULL); | ||
| 476 | } | ||
| 453 | 477 | ||
| 454 | if (!event) | 478 | if (!event) |
| 455 | return NULL; | 479 | return NULL; |
| 480 | |||
| 481 | next = ring_buffer_event_data(event); | ||
| 456 | 482 | ||
| 457 | next = ring_buffer_event_data(event); | 483 | if (data) { |
| 484 | /* | ||
| 485 | * Save current and next entries for later reference | ||
| 486 | * if the output fails. | ||
| 487 | */ | ||
| 488 | data->ent = *curr; | ||
| 489 | data->ret = *next; | ||
| 490 | } | ||
| 491 | } | ||
| 458 | 492 | ||
| 459 | if (next->ent.type != TRACE_GRAPH_RET) | 493 | if (next->ent.type != TRACE_GRAPH_RET) |
| 460 | return NULL; | 494 | return NULL; |
| @@ -640,7 +674,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, | |||
| 640 | 674 | ||
| 641 | if (data) { | 675 | if (data) { |
| 642 | int cpu = iter->cpu; | 676 | int cpu = iter->cpu; |
| 643 | int *depth = &(per_cpu_ptr(data, cpu)->depth); | 677 | int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); |
| 644 | 678 | ||
| 645 | /* | 679 | /* |
| 646 | * Comments display at + 1 to depth. Since | 680 | * Comments display at + 1 to depth. Since |
| @@ -688,7 +722,7 @@ print_graph_entry_nested(struct trace_iterator *iter, | |||
| 688 | 722 | ||
| 689 | if (data) { | 723 | if (data) { |
| 690 | int cpu = iter->cpu; | 724 | int cpu = iter->cpu; |
| 691 | int *depth = &(per_cpu_ptr(data, cpu)->depth); | 725 | int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); |
| 692 | 726 | ||
| 693 | *depth = call->depth; | 727 | *depth = call->depth; |
| 694 | } | 728 | } |
| @@ -782,19 +816,34 @@ static enum print_line_t | |||
| 782 | print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, | 816 | print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, |
| 783 | struct trace_iterator *iter) | 817 | struct trace_iterator *iter) |
| 784 | { | 818 | { |
| 785 | int cpu = iter->cpu; | 819 | struct fgraph_data *data = iter->private; |
| 786 | struct ftrace_graph_ent *call = &field->graph_ent; | 820 | struct ftrace_graph_ent *call = &field->graph_ent; |
| 787 | struct ftrace_graph_ret_entry *leaf_ret; | 821 | struct ftrace_graph_ret_entry *leaf_ret; |
| 822 | static enum print_line_t ret; | ||
| 823 | int cpu = iter->cpu; | ||
| 788 | 824 | ||
| 789 | if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) | 825 | if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) |
| 790 | return TRACE_TYPE_PARTIAL_LINE; | 826 | return TRACE_TYPE_PARTIAL_LINE; |
| 791 | 827 | ||
| 792 | leaf_ret = get_return_for_leaf(iter, field); | 828 | leaf_ret = get_return_for_leaf(iter, field); |
| 793 | if (leaf_ret) | 829 | if (leaf_ret) |
| 794 | return print_graph_entry_leaf(iter, field, leaf_ret, s); | 830 | ret = print_graph_entry_leaf(iter, field, leaf_ret, s); |
| 795 | else | 831 | else |
| 796 | return print_graph_entry_nested(iter, field, s, cpu); | 832 | ret = print_graph_entry_nested(iter, field, s, cpu); |
| 797 | 833 | ||
| 834 | if (data) { | ||
| 835 | /* | ||
| 836 | * If we failed to write our output, then we need to make | ||
| 837 | * note of it. Because we already consumed our entry. | ||
| 838 | */ | ||
| 839 | if (s->full) { | ||
| 840 | data->failed = 1; | ||
| 841 | data->cpu = cpu; | ||
| 842 | } else | ||
| 843 | data->failed = 0; | ||
| 844 | } | ||
| 845 | |||
| 846 | return ret; | ||
| 798 | } | 847 | } |
| 799 | 848 | ||
| 800 | static enum print_line_t | 849 | static enum print_line_t |
| @@ -810,7 +859,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | |||
| 810 | 859 | ||
| 811 | if (data) { | 860 | if (data) { |
| 812 | int cpu = iter->cpu; | 861 | int cpu = iter->cpu; |
| 813 | int *depth = &(per_cpu_ptr(data, cpu)->depth); | 862 | int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); |
| 814 | 863 | ||
| 815 | /* | 864 | /* |
| 816 | * Comments display at + 1 to depth. This is the | 865 | * Comments display at + 1 to depth. This is the |
| @@ -873,7 +922,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
| 873 | int i; | 922 | int i; |
| 874 | 923 | ||
| 875 | if (data) | 924 | if (data) |
| 876 | depth = per_cpu_ptr(data, iter->cpu)->depth; | 925 | depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth; |
| 877 | 926 | ||
| 878 | if (print_graph_prologue(iter, s, 0, 0)) | 927 | if (print_graph_prologue(iter, s, 0, 0)) |
| 879 | return TRACE_TYPE_PARTIAL_LINE; | 928 | return TRACE_TYPE_PARTIAL_LINE; |
| @@ -941,8 +990,33 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
| 941 | enum print_line_t | 990 | enum print_line_t |
| 942 | print_graph_function(struct trace_iterator *iter) | 991 | print_graph_function(struct trace_iterator *iter) |
| 943 | { | 992 | { |
| 993 | struct ftrace_graph_ent_entry *field; | ||
| 994 | struct fgraph_data *data = iter->private; | ||
| 944 | struct trace_entry *entry = iter->ent; | 995 | struct trace_entry *entry = iter->ent; |
| 945 | struct trace_seq *s = &iter->seq; | 996 | struct trace_seq *s = &iter->seq; |
| 997 | int cpu = iter->cpu; | ||
| 998 | int ret; | ||
| 999 | |||
| 1000 | if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) { | ||
| 1001 | per_cpu_ptr(data->cpu_data, cpu)->ignore = 0; | ||
| 1002 | return TRACE_TYPE_HANDLED; | ||
| 1003 | } | ||
| 1004 | |||
| 1005 | /* | ||
| 1006 | * If the last output failed, there's a possibility we need | ||
| 1007 | * to print out the missing entry which would never go out. | ||
| 1008 | */ | ||
| 1009 | if (data && data->failed) { | ||
| 1010 | field = &data->ent; | ||
| 1011 | iter->cpu = data->cpu; | ||
| 1012 | ret = print_graph_entry(field, s, iter); | ||
| 1013 | if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) { | ||
| 1014 | per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1; | ||
| 1015 | ret = TRACE_TYPE_NO_CONSUME; | ||
| 1016 | } | ||
| 1017 | iter->cpu = cpu; | ||
| 1018 | return ret; | ||
| 1019 | } | ||
| 946 | 1020 | ||
| 947 | switch (entry->type) { | 1021 | switch (entry->type) { |
| 948 | case TRACE_GRAPH_ENT: { | 1022 | case TRACE_GRAPH_ENT: { |
| @@ -952,7 +1026,7 @@ print_graph_function(struct trace_iterator *iter) | |||
| 952 | * sizeof(struct ftrace_graph_ent_entry) is very small, | 1026 | * sizeof(struct ftrace_graph_ent_entry) is very small, |
| 953 | * it can be safely saved at the stack. | 1027 | * it can be safely saved at the stack. |
| 954 | */ | 1028 | */ |
| 955 | struct ftrace_graph_ent_entry *field, saved; | 1029 | struct ftrace_graph_ent_entry saved; |
| 956 | trace_assign_type(field, entry); | 1030 | trace_assign_type(field, entry); |
| 957 | saved = *field; | 1031 | saved = *field; |
| 958 | return print_graph_entry(&saved, s, iter); | 1032 | return print_graph_entry(&saved, s, iter); |
| @@ -1030,31 +1104,54 @@ static void print_graph_headers(struct seq_file *s) | |||
| 1030 | static void graph_trace_open(struct trace_iterator *iter) | 1104 | static void graph_trace_open(struct trace_iterator *iter) |
| 1031 | { | 1105 | { |
| 1032 | /* pid and depth on the last trace processed */ | 1106 | /* pid and depth on the last trace processed */ |
| 1033 | struct fgraph_data *data = alloc_percpu(struct fgraph_data); | 1107 | struct fgraph_data *data; |
| 1034 | int cpu; | 1108 | int cpu; |
| 1035 | 1109 | ||
| 1110 | iter->private = NULL; | ||
| 1111 | |||
| 1112 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
| 1036 | if (!data) | 1113 | if (!data) |
| 1037 | pr_warning("function graph tracer: not enough memory\n"); | 1114 | goto out_err; |
| 1038 | else | 1115 | |
| 1039 | for_each_possible_cpu(cpu) { | 1116 | data->cpu_data = alloc_percpu(struct fgraph_cpu_data); |
| 1040 | pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid); | 1117 | if (!data->cpu_data) |
| 1041 | int *depth = &(per_cpu_ptr(data, cpu)->depth); | 1118 | goto out_err_free; |
| 1042 | *pid = -1; | 1119 | |
| 1043 | *depth = 0; | 1120 | for_each_possible_cpu(cpu) { |
| 1044 | } | 1121 | pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); |
| 1122 | int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); | ||
| 1123 | int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore); | ||
| 1124 | *pid = -1; | ||
| 1125 | *depth = 0; | ||
| 1126 | *ignore = 0; | ||
| 1127 | } | ||
| 1045 | 1128 | ||
| 1046 | iter->private = data; | 1129 | iter->private = data; |
| 1130 | |||
| 1131 | return; | ||
| 1132 | |||
| 1133 | out_err_free: | ||
| 1134 | kfree(data); | ||
| 1135 | out_err: | ||
| 1136 | pr_warning("function graph tracer: not enough memory\n"); | ||
| 1047 | } | 1137 | } |
| 1048 | 1138 | ||
| 1049 | static void graph_trace_close(struct trace_iterator *iter) | 1139 | static void graph_trace_close(struct trace_iterator *iter) |
| 1050 | { | 1140 | { |
| 1051 | free_percpu(iter->private); | 1141 | struct fgraph_data *data = iter->private; |
| 1142 | |||
| 1143 | if (data) { | ||
| 1144 | free_percpu(data->cpu_data); | ||
| 1145 | kfree(data); | ||
| 1146 | } | ||
| 1052 | } | 1147 | } |
| 1053 | 1148 | ||
| 1054 | static struct tracer graph_trace __read_mostly = { | 1149 | static struct tracer graph_trace __read_mostly = { |
| 1055 | .name = "function_graph", | 1150 | .name = "function_graph", |
| 1056 | .open = graph_trace_open, | 1151 | .open = graph_trace_open, |
| 1152 | .pipe_open = graph_trace_open, | ||
| 1057 | .close = graph_trace_close, | 1153 | .close = graph_trace_close, |
| 1154 | .pipe_close = graph_trace_close, | ||
| 1058 | .wait_pipe = poll_wait_pipe, | 1155 | .wait_pipe = poll_wait_pipe, |
| 1059 | .init = graph_trace_init, | 1156 | .init = graph_trace_init, |
| 1060 | .reset = graph_trace_reset, | 1157 | .reset = graph_trace_reset, |
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 69543a905cd5..7b97000745f5 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c | |||
| @@ -20,10 +20,10 @@ | |||
| 20 | 20 | ||
| 21 | #define BTS_BUFFER_SIZE (1 << 13) | 21 | #define BTS_BUFFER_SIZE (1 << 13) |
| 22 | 22 | ||
| 23 | static DEFINE_PER_CPU(struct bts_tracer *, tracer); | 23 | static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer); |
| 24 | static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer); | 24 | static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer); |
| 25 | 25 | ||
| 26 | #define this_tracer per_cpu(tracer, smp_processor_id()) | 26 | #define this_tracer per_cpu(hwb_tracer, smp_processor_id()) |
| 27 | 27 | ||
| 28 | static int trace_hw_branches_enabled __read_mostly; | 28 | static int trace_hw_branches_enabled __read_mostly; |
| 29 | static int trace_hw_branches_suspended __read_mostly; | 29 | static int trace_hw_branches_suspended __read_mostly; |
| @@ -32,12 +32,13 @@ static struct trace_array *hw_branch_trace __read_mostly; | |||
| 32 | 32 | ||
| 33 | static void bts_trace_init_cpu(int cpu) | 33 | static void bts_trace_init_cpu(int cpu) |
| 34 | { | 34 | { |
| 35 | per_cpu(tracer, cpu) = | 35 | per_cpu(hwb_tracer, cpu) = |
| 36 | ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE, | 36 | ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu), |
| 37 | NULL, (size_t)-1, BTS_KERNEL); | 37 | BTS_BUFFER_SIZE, NULL, (size_t)-1, |
| 38 | BTS_KERNEL); | ||
| 38 | 39 | ||
| 39 | if (IS_ERR(per_cpu(tracer, cpu))) | 40 | if (IS_ERR(per_cpu(hwb_tracer, cpu))) |
| 40 | per_cpu(tracer, cpu) = NULL; | 41 | per_cpu(hwb_tracer, cpu) = NULL; |
| 41 | } | 42 | } |
| 42 | 43 | ||
| 43 | static int bts_trace_init(struct trace_array *tr) | 44 | static int bts_trace_init(struct trace_array *tr) |
| @@ -51,7 +52,7 @@ static int bts_trace_init(struct trace_array *tr) | |||
| 51 | for_each_online_cpu(cpu) { | 52 | for_each_online_cpu(cpu) { |
| 52 | bts_trace_init_cpu(cpu); | 53 | bts_trace_init_cpu(cpu); |
| 53 | 54 | ||
| 54 | if (likely(per_cpu(tracer, cpu))) | 55 | if (likely(per_cpu(hwb_tracer, cpu))) |
| 55 | trace_hw_branches_enabled = 1; | 56 | trace_hw_branches_enabled = 1; |
| 56 | } | 57 | } |
| 57 | trace_hw_branches_suspended = 0; | 58 | trace_hw_branches_suspended = 0; |
| @@ -67,9 +68,9 @@ static void bts_trace_reset(struct trace_array *tr) | |||
| 67 | 68 | ||
| 68 | get_online_cpus(); | 69 | get_online_cpus(); |
| 69 | for_each_online_cpu(cpu) { | 70 | for_each_online_cpu(cpu) { |
| 70 | if (likely(per_cpu(tracer, cpu))) { | 71 | if (likely(per_cpu(hwb_tracer, cpu))) { |
| 71 | ds_release_bts(per_cpu(tracer, cpu)); | 72 | ds_release_bts(per_cpu(hwb_tracer, cpu)); |
| 72 | per_cpu(tracer, cpu) = NULL; | 73 | per_cpu(hwb_tracer, cpu) = NULL; |
| 73 | } | 74 | } |
| 74 | } | 75 | } |
| 75 | trace_hw_branches_enabled = 0; | 76 | trace_hw_branches_enabled = 0; |
| @@ -83,8 +84,8 @@ static void bts_trace_start(struct trace_array *tr) | |||
| 83 | 84 | ||
| 84 | get_online_cpus(); | 85 | get_online_cpus(); |
| 85 | for_each_online_cpu(cpu) | 86 | for_each_online_cpu(cpu) |
| 86 | if (likely(per_cpu(tracer, cpu))) | 87 | if (likely(per_cpu(hwb_tracer, cpu))) |
| 87 | ds_resume_bts(per_cpu(tracer, cpu)); | 88 | ds_resume_bts(per_cpu(hwb_tracer, cpu)); |
| 88 | trace_hw_branches_suspended = 0; | 89 | trace_hw_branches_suspended = 0; |
| 89 | put_online_cpus(); | 90 | put_online_cpus(); |
| 90 | } | 91 | } |
| @@ -95,8 +96,8 @@ static void bts_trace_stop(struct trace_array *tr) | |||
| 95 | 96 | ||
| 96 | get_online_cpus(); | 97 | get_online_cpus(); |
| 97 | for_each_online_cpu(cpu) | 98 | for_each_online_cpu(cpu) |
| 98 | if (likely(per_cpu(tracer, cpu))) | 99 | if (likely(per_cpu(hwb_tracer, cpu))) |
| 99 | ds_suspend_bts(per_cpu(tracer, cpu)); | 100 | ds_suspend_bts(per_cpu(hwb_tracer, cpu)); |
| 100 | trace_hw_branches_suspended = 1; | 101 | trace_hw_branches_suspended = 1; |
| 101 | put_online_cpus(); | 102 | put_online_cpus(); |
| 102 | } | 103 | } |
| @@ -114,16 +115,16 @@ static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, | |||
| 114 | bts_trace_init_cpu(cpu); | 115 | bts_trace_init_cpu(cpu); |
| 115 | 116 | ||
| 116 | if (trace_hw_branches_suspended && | 117 | if (trace_hw_branches_suspended && |
| 117 | likely(per_cpu(tracer, cpu))) | 118 | likely(per_cpu(hwb_tracer, cpu))) |
| 118 | ds_suspend_bts(per_cpu(tracer, cpu)); | 119 | ds_suspend_bts(per_cpu(hwb_tracer, cpu)); |
| 119 | } | 120 | } |
| 120 | break; | 121 | break; |
| 121 | 122 | ||
| 122 | case CPU_DOWN_PREPARE: | 123 | case CPU_DOWN_PREPARE: |
| 123 | /* The notification is sent with interrupts enabled. */ | 124 | /* The notification is sent with interrupts enabled. */ |
| 124 | if (likely(per_cpu(tracer, cpu))) { | 125 | if (likely(per_cpu(hwb_tracer, cpu))) { |
| 125 | ds_release_bts(per_cpu(tracer, cpu)); | 126 | ds_release_bts(per_cpu(hwb_tracer, cpu)); |
| 126 | per_cpu(tracer, cpu) = NULL; | 127 | per_cpu(hwb_tracer, cpu) = NULL; |
| 127 | } | 128 | } |
| 128 | } | 129 | } |
| 129 | 130 | ||
| @@ -258,8 +259,8 @@ static void trace_bts_prepare(struct trace_iterator *iter) | |||
| 258 | 259 | ||
| 259 | get_online_cpus(); | 260 | get_online_cpus(); |
| 260 | for_each_online_cpu(cpu) | 261 | for_each_online_cpu(cpu) |
| 261 | if (likely(per_cpu(tracer, cpu))) | 262 | if (likely(per_cpu(hwb_tracer, cpu))) |
| 262 | ds_suspend_bts(per_cpu(tracer, cpu)); | 263 | ds_suspend_bts(per_cpu(hwb_tracer, cpu)); |
| 263 | /* | 264 | /* |
| 264 | * We need to collect the trace on the respective cpu since ftrace | 265 | * We need to collect the trace on the respective cpu since ftrace |
| 265 | * implicitly adds the record for the current cpu. | 266 | * implicitly adds the record for the current cpu. |
| @@ -268,8 +269,8 @@ static void trace_bts_prepare(struct trace_iterator *iter) | |||
| 268 | on_each_cpu(trace_bts_cpu, iter->tr, 1); | 269 | on_each_cpu(trace_bts_cpu, iter->tr, 1); |
| 269 | 270 | ||
| 270 | for_each_online_cpu(cpu) | 271 | for_each_online_cpu(cpu) |
| 271 | if (likely(per_cpu(tracer, cpu))) | 272 | if (likely(per_cpu(hwb_tracer, cpu))) |
| 272 | ds_resume_bts(per_cpu(tracer, cpu)); | 273 | ds_resume_bts(per_cpu(hwb_tracer, cpu)); |
| 273 | put_online_cpus(); | 274 | put_online_cpus(); |
| 274 | } | 275 | } |
| 275 | 276 | ||
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index aff5f80b59b8..b52d397e57eb 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
| @@ -606,23 +606,22 @@ static int create_trace_probe(int argc, char **argv) | |||
| 606 | */ | 606 | */ |
| 607 | struct trace_probe *tp; | 607 | struct trace_probe *tp; |
| 608 | int i, ret = 0; | 608 | int i, ret = 0; |
| 609 | int is_return = 0; | 609 | int is_return = 0, is_delete = 0; |
| 610 | char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; | 610 | char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; |
| 611 | unsigned long offset = 0; | 611 | unsigned long offset = 0; |
| 612 | void *addr = NULL; | 612 | void *addr = NULL; |
| 613 | char buf[MAX_EVENT_NAME_LEN]; | 613 | char buf[MAX_EVENT_NAME_LEN]; |
| 614 | 614 | ||
| 615 | if (argc < 2) { | 615 | /* argc must be >= 1 */ |
| 616 | pr_info("Probe point is not specified.\n"); | ||
| 617 | return -EINVAL; | ||
| 618 | } | ||
| 619 | |||
| 620 | if (argv[0][0] == 'p') | 616 | if (argv[0][0] == 'p') |
| 621 | is_return = 0; | 617 | is_return = 0; |
| 622 | else if (argv[0][0] == 'r') | 618 | else if (argv[0][0] == 'r') |
| 623 | is_return = 1; | 619 | is_return = 1; |
| 620 | else if (argv[0][0] == '-') | ||
| 621 | is_delete = 1; | ||
| 624 | else { | 622 | else { |
| 625 | pr_info("Probe definition must be started with 'p' or 'r'.\n"); | 623 | pr_info("Probe definition must be started with 'p', 'r' or" |
| 624 | " '-'.\n"); | ||
| 626 | return -EINVAL; | 625 | return -EINVAL; |
| 627 | } | 626 | } |
| 628 | 627 | ||
| @@ -642,7 +641,29 @@ static int create_trace_probe(int argc, char **argv) | |||
| 642 | return -EINVAL; | 641 | return -EINVAL; |
| 643 | } | 642 | } |
| 644 | } | 643 | } |
| 644 | if (!group) | ||
| 645 | group = KPROBE_EVENT_SYSTEM; | ||
| 645 | 646 | ||
| 647 | if (is_delete) { | ||
| 648 | if (!event) { | ||
| 649 | pr_info("Delete command needs an event name.\n"); | ||
| 650 | return -EINVAL; | ||
| 651 | } | ||
| 652 | tp = find_probe_event(event, group); | ||
| 653 | if (!tp) { | ||
| 654 | pr_info("Event %s/%s doesn't exist.\n", group, event); | ||
| 655 | return -ENOENT; | ||
| 656 | } | ||
| 657 | /* delete an event */ | ||
| 658 | unregister_trace_probe(tp); | ||
| 659 | free_trace_probe(tp); | ||
| 660 | return 0; | ||
| 661 | } | ||
| 662 | |||
| 663 | if (argc < 2) { | ||
| 664 | pr_info("Probe point is not specified.\n"); | ||
| 665 | return -EINVAL; | ||
| 666 | } | ||
| 646 | if (isdigit(argv[1][0])) { | 667 | if (isdigit(argv[1][0])) { |
| 647 | if (is_return) { | 668 | if (is_return) { |
| 648 | pr_info("Return probe point must be a symbol.\n"); | 669 | pr_info("Return probe point must be a symbol.\n"); |
| @@ -671,8 +692,6 @@ static int create_trace_probe(int argc, char **argv) | |||
| 671 | argc -= 2; argv += 2; | 692 | argc -= 2; argv += 2; |
| 672 | 693 | ||
| 673 | /* setup a probe */ | 694 | /* setup a probe */ |
| 674 | if (!group) | ||
| 675 | group = KPROBE_EVENT_SYSTEM; | ||
| 676 | if (!event) { | 695 | if (!event) { |
| 677 | /* Make a new event name */ | 696 | /* Make a new event name */ |
| 678 | if (symbol) | 697 | if (symbol) |
| @@ -1114,7 +1133,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call) | |||
| 1114 | struct trace_probe *tp = (struct trace_probe *)event_call->data; | 1133 | struct trace_probe *tp = (struct trace_probe *)event_call->data; |
| 1115 | 1134 | ||
| 1116 | ret = trace_define_common_fields(event_call); | 1135 | ret = trace_define_common_fields(event_call); |
| 1117 | if (!ret) | 1136 | if (ret) |
| 1118 | return ret; | 1137 | return ret; |
| 1119 | 1138 | ||
| 1120 | DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); | 1139 | DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); |
| @@ -1132,7 +1151,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) | |||
| 1132 | struct trace_probe *tp = (struct trace_probe *)event_call->data; | 1151 | struct trace_probe *tp = (struct trace_probe *)event_call->data; |
| 1133 | 1152 | ||
| 1134 | ret = trace_define_common_fields(event_call); | 1153 | ret = trace_define_common_fields(event_call); |
| 1135 | if (!ret) | 1154 | if (ret) |
| 1136 | return ret; | 1155 | return ret; |
| 1137 | 1156 | ||
| 1138 | DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); | 1157 | DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); |
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index ddfa0fd43bc0..acb87d4a4ac1 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c | |||
| @@ -79,11 +79,12 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) | |||
| 79 | } | 79 | } |
| 80 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | 80 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ |
| 81 | 81 | ||
| 82 | void ksym_hbp_handler(struct perf_event *hbp, void *data) | 82 | void ksym_hbp_handler(struct perf_event *hbp, int nmi, |
| 83 | struct perf_sample_data *data, | ||
| 84 | struct pt_regs *regs) | ||
| 83 | { | 85 | { |
| 84 | struct ring_buffer_event *event; | 86 | struct ring_buffer_event *event; |
| 85 | struct ksym_trace_entry *entry; | 87 | struct ksym_trace_entry *entry; |
| 86 | struct pt_regs *regs = data; | ||
| 87 | struct ring_buffer *buffer; | 88 | struct ring_buffer *buffer; |
| 88 | int pc; | 89 | int pc; |
| 89 | 90 | ||
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index b6c12c6a1bcd..8e46b3323cdc 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
| @@ -23,13 +23,21 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; | |||
| 23 | 23 | ||
| 24 | static int next_event_type = __TRACE_LAST_TYPE + 1; | 24 | static int next_event_type = __TRACE_LAST_TYPE + 1; |
| 25 | 25 | ||
| 26 | void trace_print_seq(struct seq_file *m, struct trace_seq *s) | 26 | int trace_print_seq(struct seq_file *m, struct trace_seq *s) |
| 27 | { | 27 | { |
| 28 | int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; | 28 | int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; |
| 29 | int ret; | ||
| 30 | |||
| 31 | ret = seq_write(m, s->buffer, len); | ||
| 29 | 32 | ||
| 30 | seq_write(m, s->buffer, len); | 33 | /* |
| 34 | * Only reset this buffer if we successfully wrote to the | ||
| 35 | * seq_file buffer. | ||
| 36 | */ | ||
| 37 | if (!ret) | ||
| 38 | trace_seq_init(s); | ||
| 31 | 39 | ||
| 32 | trace_seq_init(s); | 40 | return ret; |
| 33 | } | 41 | } |
| 34 | 42 | ||
| 35 | enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) | 43 | enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) |
| @@ -85,7 +93,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) | |||
| 85 | va_list ap; | 93 | va_list ap; |
| 86 | int ret; | 94 | int ret; |
| 87 | 95 | ||
| 88 | if (!len) | 96 | if (s->full || !len) |
| 89 | return 0; | 97 | return 0; |
| 90 | 98 | ||
| 91 | va_start(ap, fmt); | 99 | va_start(ap, fmt); |
| @@ -93,8 +101,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) | |||
| 93 | va_end(ap); | 101 | va_end(ap); |
| 94 | 102 | ||
| 95 | /* If we can't write it all, don't bother writing anything */ | 103 | /* If we can't write it all, don't bother writing anything */ |
| 96 | if (ret >= len) | 104 | if (ret >= len) { |
| 105 | s->full = 1; | ||
| 97 | return 0; | 106 | return 0; |
| 107 | } | ||
| 98 | 108 | ||
| 99 | s->len += ret; | 109 | s->len += ret; |
| 100 | 110 | ||
| @@ -119,14 +129,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) | |||
| 119 | int len = (PAGE_SIZE - 1) - s->len; | 129 | int len = (PAGE_SIZE - 1) - s->len; |
| 120 | int ret; | 130 | int ret; |
| 121 | 131 | ||
| 122 | if (!len) | 132 | if (s->full || !len) |
| 123 | return 0; | 133 | return 0; |
| 124 | 134 | ||
| 125 | ret = vsnprintf(s->buffer + s->len, len, fmt, args); | 135 | ret = vsnprintf(s->buffer + s->len, len, fmt, args); |
| 126 | 136 | ||
| 127 | /* If we can't write it all, don't bother writing anything */ | 137 | /* If we can't write it all, don't bother writing anything */ |
| 128 | if (ret >= len) | 138 | if (ret >= len) { |
| 139 | s->full = 1; | ||
| 129 | return 0; | 140 | return 0; |
| 141 | } | ||
| 130 | 142 | ||
| 131 | s->len += ret; | 143 | s->len += ret; |
| 132 | 144 | ||
| @@ -139,14 +151,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) | |||
| 139 | int len = (PAGE_SIZE - 1) - s->len; | 151 | int len = (PAGE_SIZE - 1) - s->len; |
| 140 | int ret; | 152 | int ret; |
| 141 | 153 | ||
| 142 | if (!len) | 154 | if (s->full || !len) |
| 143 | return 0; | 155 | return 0; |
| 144 | 156 | ||
| 145 | ret = bstr_printf(s->buffer + s->len, len, fmt, binary); | 157 | ret = bstr_printf(s->buffer + s->len, len, fmt, binary); |
| 146 | 158 | ||
| 147 | /* If we can't write it all, don't bother writing anything */ | 159 | /* If we can't write it all, don't bother writing anything */ |
| 148 | if (ret >= len) | 160 | if (ret >= len) { |
| 161 | s->full = 1; | ||
| 149 | return 0; | 162 | return 0; |
| 163 | } | ||
| 150 | 164 | ||
| 151 | s->len += ret; | 165 | s->len += ret; |
| 152 | 166 | ||
| @@ -167,8 +181,13 @@ int trace_seq_puts(struct trace_seq *s, const char *str) | |||
| 167 | { | 181 | { |
| 168 | int len = strlen(str); | 182 | int len = strlen(str); |
| 169 | 183 | ||
| 170 | if (len > ((PAGE_SIZE - 1) - s->len)) | 184 | if (s->full) |
| 185 | return 0; | ||
| 186 | |||
| 187 | if (len > ((PAGE_SIZE - 1) - s->len)) { | ||
| 188 | s->full = 1; | ||
| 171 | return 0; | 189 | return 0; |
| 190 | } | ||
| 172 | 191 | ||
| 173 | memcpy(s->buffer + s->len, str, len); | 192 | memcpy(s->buffer + s->len, str, len); |
| 174 | s->len += len; | 193 | s->len += len; |
| @@ -178,9 +197,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str) | |||
| 178 | 197 | ||
| 179 | int trace_seq_putc(struct trace_seq *s, unsigned char c) | 198 | int trace_seq_putc(struct trace_seq *s, unsigned char c) |
| 180 | { | 199 | { |
| 181 | if (s->len >= (PAGE_SIZE - 1)) | 200 | if (s->full) |
| 182 | return 0; | 201 | return 0; |
| 183 | 202 | ||
| 203 | if (s->len >= (PAGE_SIZE - 1)) { | ||
| 204 | s->full = 1; | ||
| 205 | return 0; | ||
| 206 | } | ||
| 207 | |||
| 184 | s->buffer[s->len++] = c; | 208 | s->buffer[s->len++] = c; |
| 185 | 209 | ||
| 186 | return 1; | 210 | return 1; |
| @@ -188,9 +212,14 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c) | |||
| 188 | 212 | ||
| 189 | int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) | 213 | int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) |
| 190 | { | 214 | { |
| 191 | if (len > ((PAGE_SIZE - 1) - s->len)) | 215 | if (s->full) |
| 192 | return 0; | 216 | return 0; |
| 193 | 217 | ||
| 218 | if (len > ((PAGE_SIZE - 1) - s->len)) { | ||
| 219 | s->full = 1; | ||
| 220 | return 0; | ||
| 221 | } | ||
| 222 | |||
| 194 | memcpy(s->buffer + s->len, mem, len); | 223 | memcpy(s->buffer + s->len, mem, len); |
| 195 | s->len += len; | 224 | s->len += len; |
| 196 | 225 | ||
| @@ -203,6 +232,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len) | |||
| 203 | const unsigned char *data = mem; | 232 | const unsigned char *data = mem; |
| 204 | int i, j; | 233 | int i, j; |
| 205 | 234 | ||
| 235 | if (s->full) | ||
| 236 | return 0; | ||
| 237 | |||
| 206 | #ifdef __BIG_ENDIAN | 238 | #ifdef __BIG_ENDIAN |
| 207 | for (i = 0, j = 0; i < len; i++) { | 239 | for (i = 0, j = 0; i < len; i++) { |
| 208 | #else | 240 | #else |
| @@ -220,8 +252,13 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len) | |||
| 220 | { | 252 | { |
| 221 | void *ret; | 253 | void *ret; |
| 222 | 254 | ||
| 223 | if (len > ((PAGE_SIZE - 1) - s->len)) | 255 | if (s->full) |
| 256 | return 0; | ||
| 257 | |||
| 258 | if (len > ((PAGE_SIZE - 1) - s->len)) { | ||
| 259 | s->full = 1; | ||
| 224 | return NULL; | 260 | return NULL; |
| 261 | } | ||
| 225 | 262 | ||
| 226 | ret = s->buffer + s->len; | 263 | ret = s->buffer + s->len; |
| 227 | s->len += len; | 264 | s->len += len; |
| @@ -233,8 +270,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path) | |||
| 233 | { | 270 | { |
| 234 | unsigned char *p; | 271 | unsigned char *p; |
| 235 | 272 | ||
| 236 | if (s->len >= (PAGE_SIZE - 1)) | 273 | if (s->full) |
| 274 | return 0; | ||
| 275 | |||
| 276 | if (s->len >= (PAGE_SIZE - 1)) { | ||
| 277 | s->full = 1; | ||
| 237 | return 0; | 278 | return 0; |
| 279 | } | ||
| 280 | |||
| 238 | p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); | 281 | p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); |
| 239 | if (!IS_ERR(p)) { | 282 | if (!IS_ERR(p)) { |
| 240 | p = mangle_path(s->buffer + s->len, p, "\n"); | 283 | p = mangle_path(s->buffer + s->len, p, "\n"); |
| @@ -247,6 +290,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path) | |||
| 247 | return 1; | 290 | return 1; |
| 248 | } | 291 | } |
| 249 | 292 | ||
| 293 | s->full = 1; | ||
| 250 | return 0; | 294 | return 0; |
| 251 | } | 295 | } |
| 252 | 296 | ||
| @@ -373,6 +417,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, | |||
| 373 | unsigned long vmstart = 0; | 417 | unsigned long vmstart = 0; |
| 374 | int ret = 1; | 418 | int ret = 1; |
| 375 | 419 | ||
| 420 | if (s->full) | ||
| 421 | return 0; | ||
| 422 | |||
| 376 | if (mm) { | 423 | if (mm) { |
| 377 | const struct vm_area_struct *vma; | 424 | const struct vm_area_struct *vma; |
| 378 | 425 | ||
