diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 18 | ||||
-rw-r--r-- | kernel/cpuset.c | 18 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/futex.c | 10 | ||||
-rw-r--r-- | kernel/hrtimer.c | 121 | ||||
-rw-r--r-- | kernel/hw_breakpoint.c | 146 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 2 | ||||
-rw-r--r-- | kernel/kgdb.c | 56 | ||||
-rw-r--r-- | kernel/lockdep.c | 16 | ||||
-rw-r--r-- | kernel/perf_event.c | 79 | ||||
-rw-r--r-- | kernel/pm_qos_params.c | 20 | ||||
-rw-r--r-- | kernel/resource.c | 26 | ||||
-rw-r--r-- | kernel/sched.c | 218 | ||||
-rw-r--r-- | kernel/sched_debug.c | 13 | ||||
-rw-r--r-- | kernel/sched_fair.c | 155 | ||||
-rw-r--r-- | kernel/sched_features.h | 5 | ||||
-rw-r--r-- | kernel/sched_idletask.c | 2 | ||||
-rw-r--r-- | kernel/sched_rt.c | 2 | ||||
-rw-r--r-- | kernel/sys.c | 14 | ||||
-rw-r--r-- | kernel/sysctl.c | 30 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 2 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 5 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 37 | ||||
-rw-r--r-- | kernel/trace/trace_ksym.c | 5 | ||||
-rw-r--r-- | kernel/workqueue.c | 131 |
25 files changed, 716 insertions, 417 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 7c4e2713df0a..291ac586f37f 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -212,6 +212,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
212 | err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, | 212 | err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, |
213 | hcpu, -1, &nr_calls); | 213 | hcpu, -1, &nr_calls); |
214 | if (err == NOTIFY_BAD) { | 214 | if (err == NOTIFY_BAD) { |
215 | set_cpu_active(cpu, true); | ||
216 | |||
215 | nr_calls--; | 217 | nr_calls--; |
216 | __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, | 218 | __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, |
217 | hcpu, nr_calls, NULL); | 219 | hcpu, nr_calls, NULL); |
@@ -223,11 +225,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
223 | 225 | ||
224 | /* Ensure that we are not runnable on dying cpu */ | 226 | /* Ensure that we are not runnable on dying cpu */ |
225 | cpumask_copy(old_allowed, ¤t->cpus_allowed); | 227 | cpumask_copy(old_allowed, ¤t->cpus_allowed); |
226 | set_cpus_allowed_ptr(current, | 228 | set_cpus_allowed_ptr(current, cpu_active_mask); |
227 | cpumask_of(cpumask_any_but(cpu_online_mask, cpu))); | ||
228 | 229 | ||
229 | err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); | 230 | err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); |
230 | if (err) { | 231 | if (err) { |
232 | set_cpu_active(cpu, true); | ||
231 | /* CPU didn't die: tell everyone. Can't complain. */ | 233 | /* CPU didn't die: tell everyone. Can't complain. */ |
232 | if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, | 234 | if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, |
233 | hcpu) == NOTIFY_BAD) | 235 | hcpu) == NOTIFY_BAD) |
@@ -292,9 +294,6 @@ int __ref cpu_down(unsigned int cpu) | |||
292 | 294 | ||
293 | err = _cpu_down(cpu, 0); | 295 | err = _cpu_down(cpu, 0); |
294 | 296 | ||
295 | if (cpu_online(cpu)) | ||
296 | set_cpu_active(cpu, true); | ||
297 | |||
298 | out: | 297 | out: |
299 | cpu_maps_update_done(); | 298 | cpu_maps_update_done(); |
300 | stop_machine_destroy(); | 299 | stop_machine_destroy(); |
@@ -387,6 +386,15 @@ int disable_nonboot_cpus(void) | |||
387 | * with the userspace trying to use the CPU hotplug at the same time | 386 | * with the userspace trying to use the CPU hotplug at the same time |
388 | */ | 387 | */ |
389 | cpumask_clear(frozen_cpus); | 388 | cpumask_clear(frozen_cpus); |
389 | |||
390 | for_each_online_cpu(cpu) { | ||
391 | if (cpu == first_cpu) | ||
392 | continue; | ||
393 | set_cpu_active(cpu, false); | ||
394 | } | ||
395 | |||
396 | synchronize_sched(); | ||
397 | |||
390 | printk("Disabling non-boot CPUs ...\n"); | 398 | printk("Disabling non-boot CPUs ...\n"); |
391 | for_each_online_cpu(cpu) { | 399 | for_each_online_cpu(cpu) { |
392 | if (cpu == first_cpu) | 400 | if (cpu == first_cpu) |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3cf2183b472d..ba401fab459f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -737,7 +737,7 @@ static void do_rebuild_sched_domains(struct work_struct *unused) | |||
737 | { | 737 | { |
738 | } | 738 | } |
739 | 739 | ||
740 | static int generate_sched_domains(struct cpumask **domains, | 740 | static int generate_sched_domains(cpumask_var_t **domains, |
741 | struct sched_domain_attr **attributes) | 741 | struct sched_domain_attr **attributes) |
742 | { | 742 | { |
743 | *domains = NULL; | 743 | *domains = NULL; |
@@ -872,7 +872,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
872 | if (retval < 0) | 872 | if (retval < 0) |
873 | return retval; | 873 | return retval; |
874 | 874 | ||
875 | if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask)) | 875 | if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask)) |
876 | return -EINVAL; | 876 | return -EINVAL; |
877 | } | 877 | } |
878 | retval = validate_change(cs, trialcs); | 878 | retval = validate_change(cs, trialcs); |
@@ -2010,7 +2010,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2010 | } | 2010 | } |
2011 | 2011 | ||
2012 | /* Continue past cpusets with all cpus, mems online */ | 2012 | /* Continue past cpusets with all cpus, mems online */ |
2013 | if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) && | 2013 | if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) && |
2014 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | 2014 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) |
2015 | continue; | 2015 | continue; |
2016 | 2016 | ||
@@ -2019,7 +2019,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2019 | /* Remove offline cpus and mems from this cpuset. */ | 2019 | /* Remove offline cpus and mems from this cpuset. */ |
2020 | mutex_lock(&callback_mutex); | 2020 | mutex_lock(&callback_mutex); |
2021 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, | 2021 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, |
2022 | cpu_online_mask); | 2022 | cpu_active_mask); |
2023 | nodes_and(cp->mems_allowed, cp->mems_allowed, | 2023 | nodes_and(cp->mems_allowed, cp->mems_allowed, |
2024 | node_states[N_HIGH_MEMORY]); | 2024 | node_states[N_HIGH_MEMORY]); |
2025 | mutex_unlock(&callback_mutex); | 2025 | mutex_unlock(&callback_mutex); |
@@ -2057,8 +2057,10 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
2057 | switch (phase) { | 2057 | switch (phase) { |
2058 | case CPU_ONLINE: | 2058 | case CPU_ONLINE: |
2059 | case CPU_ONLINE_FROZEN: | 2059 | case CPU_ONLINE_FROZEN: |
2060 | case CPU_DEAD: | 2060 | case CPU_DOWN_PREPARE: |
2061 | case CPU_DEAD_FROZEN: | 2061 | case CPU_DOWN_PREPARE_FROZEN: |
2062 | case CPU_DOWN_FAILED: | ||
2063 | case CPU_DOWN_FAILED_FROZEN: | ||
2062 | break; | 2064 | break; |
2063 | 2065 | ||
2064 | default: | 2066 | default: |
@@ -2067,7 +2069,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
2067 | 2069 | ||
2068 | cgroup_lock(); | 2070 | cgroup_lock(); |
2069 | mutex_lock(&callback_mutex); | 2071 | mutex_lock(&callback_mutex); |
2070 | cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); | 2072 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
2071 | mutex_unlock(&callback_mutex); | 2073 | mutex_unlock(&callback_mutex); |
2072 | scan_for_empty_cpusets(&top_cpuset); | 2074 | scan_for_empty_cpusets(&top_cpuset); |
2073 | ndoms = generate_sched_domains(&doms, &attr); | 2075 | ndoms = generate_sched_domains(&doms, &attr); |
@@ -2114,7 +2116,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self, | |||
2114 | 2116 | ||
2115 | void __init cpuset_init_smp(void) | 2117 | void __init cpuset_init_smp(void) |
2116 | { | 2118 | { |
2117 | cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); | 2119 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
2118 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 2120 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
2119 | 2121 | ||
2120 | hotcpu_notifier(cpuset_track_online_cpus, 0); | 2122 | hotcpu_notifier(cpuset_track_online_cpus, 0); |
diff --git a/kernel/exit.c b/kernel/exit.c index 1143012951e9..6f50ef55a6f3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -971,7 +971,7 @@ NORET_TYPE void do_exit(long code) | |||
971 | exit_thread(); | 971 | exit_thread(); |
972 | cgroup_exit(tsk, 1); | 972 | cgroup_exit(tsk, 1); |
973 | 973 | ||
974 | if (group_dead && tsk->signal->leader) | 974 | if (group_dead) |
975 | disassociate_ctty(1); | 975 | disassociate_ctty(1); |
976 | 976 | ||
977 | module_put(task_thread_info(tsk)->exec_domain->module); | 977 | module_put(task_thread_info(tsk)->exec_domain->module); |
diff --git a/kernel/futex.c b/kernel/futex.c index fb65e822fc41..d73ef1f3e55d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -304,8 +304,14 @@ void put_futex_key(int fshared, union futex_key *key) | |||
304 | */ | 304 | */ |
305 | static int fault_in_user_writeable(u32 __user *uaddr) | 305 | static int fault_in_user_writeable(u32 __user *uaddr) |
306 | { | 306 | { |
307 | int ret = get_user_pages(current, current->mm, (unsigned long)uaddr, | 307 | struct mm_struct *mm = current->mm; |
308 | 1, 1, 0, NULL, NULL); | 308 | int ret; |
309 | |||
310 | down_read(&mm->mmap_sem); | ||
311 | ret = get_user_pages(current, mm, (unsigned long)uaddr, | ||
312 | 1, 1, 0, NULL, NULL); | ||
313 | up_read(&mm->mmap_sem); | ||
314 | |||
309 | return ret < 0 ? ret : 0; | 315 | return ret < 0 ? ret : 0; |
310 | } | 316 | } |
311 | 317 | ||
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ede527708123..d2f9239dc6ba 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -557,7 +557,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) | |||
557 | static int hrtimer_reprogram(struct hrtimer *timer, | 557 | static int hrtimer_reprogram(struct hrtimer *timer, |
558 | struct hrtimer_clock_base *base) | 558 | struct hrtimer_clock_base *base) |
559 | { | 559 | { |
560 | ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; | 560 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
561 | ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); | 561 | ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); |
562 | int res; | 562 | int res; |
563 | 563 | ||
@@ -582,7 +582,16 @@ static int hrtimer_reprogram(struct hrtimer *timer, | |||
582 | if (expires.tv64 < 0) | 582 | if (expires.tv64 < 0) |
583 | return -ETIME; | 583 | return -ETIME; |
584 | 584 | ||
585 | if (expires.tv64 >= expires_next->tv64) | 585 | if (expires.tv64 >= cpu_base->expires_next.tv64) |
586 | return 0; | ||
587 | |||
588 | /* | ||
589 | * If a hang was detected in the last timer interrupt then we | ||
590 | * do not schedule a timer which is earlier than the expiry | ||
591 | * which we enforced in the hang detection. We want the system | ||
592 | * to make progress. | ||
593 | */ | ||
594 | if (cpu_base->hang_detected) | ||
586 | return 0; | 595 | return 0; |
587 | 596 | ||
588 | /* | 597 | /* |
@@ -590,7 +599,7 @@ static int hrtimer_reprogram(struct hrtimer *timer, | |||
590 | */ | 599 | */ |
591 | res = tick_program_event(expires, 0); | 600 | res = tick_program_event(expires, 0); |
592 | if (!IS_ERR_VALUE(res)) | 601 | if (!IS_ERR_VALUE(res)) |
593 | *expires_next = expires; | 602 | cpu_base->expires_next = expires; |
594 | return res; | 603 | return res; |
595 | } | 604 | } |
596 | 605 | ||
@@ -747,17 +756,33 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } | |||
747 | 756 | ||
748 | #endif /* CONFIG_HIGH_RES_TIMERS */ | 757 | #endif /* CONFIG_HIGH_RES_TIMERS */ |
749 | 758 | ||
750 | #ifdef CONFIG_TIMER_STATS | 759 | static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) |
751 | void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr) | ||
752 | { | 760 | { |
761 | #ifdef CONFIG_TIMER_STATS | ||
753 | if (timer->start_site) | 762 | if (timer->start_site) |
754 | return; | 763 | return; |
755 | 764 | timer->start_site = __builtin_return_address(0); | |
756 | timer->start_site = addr; | ||
757 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | 765 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); |
758 | timer->start_pid = current->pid; | 766 | timer->start_pid = current->pid; |
767 | #endif | ||
759 | } | 768 | } |
769 | |||
770 | static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) | ||
771 | { | ||
772 | #ifdef CONFIG_TIMER_STATS | ||
773 | timer->start_site = NULL; | ||
774 | #endif | ||
775 | } | ||
776 | |||
777 | static inline void timer_stats_account_hrtimer(struct hrtimer *timer) | ||
778 | { | ||
779 | #ifdef CONFIG_TIMER_STATS | ||
780 | if (likely(!timer_stats_active)) | ||
781 | return; | ||
782 | timer_stats_update_stats(timer, timer->start_pid, timer->start_site, | ||
783 | timer->function, timer->start_comm, 0); | ||
760 | #endif | 784 | #endif |
785 | } | ||
761 | 786 | ||
762 | /* | 787 | /* |
763 | * Counterpart to lock_hrtimer_base above: | 788 | * Counterpart to lock_hrtimer_base above: |
@@ -1217,30 +1242,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) | |||
1217 | 1242 | ||
1218 | #ifdef CONFIG_HIGH_RES_TIMERS | 1243 | #ifdef CONFIG_HIGH_RES_TIMERS |
1219 | 1244 | ||
1220 | static int force_clock_reprogram; | ||
1221 | |||
1222 | /* | ||
1223 | * After 5 iteration's attempts, we consider that hrtimer_interrupt() | ||
1224 | * is hanging, which could happen with something that slows the interrupt | ||
1225 | * such as the tracing. Then we force the clock reprogramming for each future | ||
1226 | * hrtimer interrupts to avoid infinite loops and use the min_delta_ns | ||
1227 | * threshold that we will overwrite. | ||
1228 | * The next tick event will be scheduled to 3 times we currently spend on | ||
1229 | * hrtimer_interrupt(). This gives a good compromise, the cpus will spend | ||
1230 | * 1/4 of their time to process the hrtimer interrupts. This is enough to | ||
1231 | * let it running without serious starvation. | ||
1232 | */ | ||
1233 | |||
1234 | static inline void | ||
1235 | hrtimer_interrupt_hanging(struct clock_event_device *dev, | ||
1236 | ktime_t try_time) | ||
1237 | { | ||
1238 | force_clock_reprogram = 1; | ||
1239 | dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; | ||
1240 | printk(KERN_WARNING "hrtimer: interrupt too slow, " | ||
1241 | "forcing clock min delta to %llu ns\n", | ||
1242 | (unsigned long long) dev->min_delta_ns); | ||
1243 | } | ||
1244 | /* | 1245 | /* |
1245 | * High resolution timer interrupt | 1246 | * High resolution timer interrupt |
1246 | * Called with interrupts disabled | 1247 | * Called with interrupts disabled |
@@ -1249,21 +1250,15 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1249 | { | 1250 | { |
1250 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | 1251 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
1251 | struct hrtimer_clock_base *base; | 1252 | struct hrtimer_clock_base *base; |
1252 | ktime_t expires_next, now; | 1253 | ktime_t expires_next, now, entry_time, delta; |
1253 | int nr_retries = 0; | 1254 | int i, retries = 0; |
1254 | int i; | ||
1255 | 1255 | ||
1256 | BUG_ON(!cpu_base->hres_active); | 1256 | BUG_ON(!cpu_base->hres_active); |
1257 | cpu_base->nr_events++; | 1257 | cpu_base->nr_events++; |
1258 | dev->next_event.tv64 = KTIME_MAX; | 1258 | dev->next_event.tv64 = KTIME_MAX; |
1259 | 1259 | ||
1260 | retry: | 1260 | entry_time = now = ktime_get(); |
1261 | /* 5 retries is enough to notice a hang */ | 1261 | retry: |
1262 | if (!(++nr_retries % 5)) | ||
1263 | hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now)); | ||
1264 | |||
1265 | now = ktime_get(); | ||
1266 | |||
1267 | expires_next.tv64 = KTIME_MAX; | 1262 | expires_next.tv64 = KTIME_MAX; |
1268 | 1263 | ||
1269 | spin_lock(&cpu_base->lock); | 1264 | spin_lock(&cpu_base->lock); |
@@ -1325,10 +1320,48 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1325 | spin_unlock(&cpu_base->lock); | 1320 | spin_unlock(&cpu_base->lock); |
1326 | 1321 | ||
1327 | /* Reprogramming necessary ? */ | 1322 | /* Reprogramming necessary ? */ |
1328 | if (expires_next.tv64 != KTIME_MAX) { | 1323 | if (expires_next.tv64 == KTIME_MAX || |
1329 | if (tick_program_event(expires_next, force_clock_reprogram)) | 1324 | !tick_program_event(expires_next, 0)) { |
1330 | goto retry; | 1325 | cpu_base->hang_detected = 0; |
1326 | return; | ||
1331 | } | 1327 | } |
1328 | |||
1329 | /* | ||
1330 | * The next timer was already expired due to: | ||
1331 | * - tracing | ||
1332 | * - long lasting callbacks | ||
1333 | * - being scheduled away when running in a VM | ||
1334 | * | ||
1335 | * We need to prevent that we loop forever in the hrtimer | ||
1336 | * interrupt routine. We give it 3 attempts to avoid | ||
1337 | * overreacting on some spurious event. | ||
1338 | */ | ||
1339 | now = ktime_get(); | ||
1340 | cpu_base->nr_retries++; | ||
1341 | if (++retries < 3) | ||
1342 | goto retry; | ||
1343 | /* | ||
1344 | * Give the system a chance to do something else than looping | ||
1345 | * here. We stored the entry time, so we know exactly how long | ||
1346 | * we spent here. We schedule the next event this amount of | ||
1347 | * time away. | ||
1348 | */ | ||
1349 | cpu_base->nr_hangs++; | ||
1350 | cpu_base->hang_detected = 1; | ||
1351 | delta = ktime_sub(now, entry_time); | ||
1352 | if (delta.tv64 > cpu_base->max_hang_time.tv64) | ||
1353 | cpu_base->max_hang_time = delta; | ||
1354 | /* | ||
1355 | * Limit it to a sensible value as we enforce a longer | ||
1356 | * delay. Give the CPU at least 100ms to catch up. | ||
1357 | */ | ||
1358 | if (delta.tv64 > 100 * NSEC_PER_MSEC) | ||
1359 | expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); | ||
1360 | else | ||
1361 | expires_next = ktime_add(now, delta); | ||
1362 | tick_program_event(expires_next, 1); | ||
1363 | printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n", | ||
1364 | ktime_to_ns(delta)); | ||
1332 | } | 1365 | } |
1333 | 1366 | ||
1334 | /* | 1367 | /* |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index cf5ee1628411..366eedf949c0 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
@@ -52,7 +52,7 @@ | |||
52 | static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); | 52 | static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); |
53 | 53 | ||
54 | /* Number of pinned task breakpoints in a cpu */ | 54 | /* Number of pinned task breakpoints in a cpu */ |
55 | static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); | 55 | static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]); |
56 | 56 | ||
57 | /* Number of non-pinned cpu/task breakpoints in a cpu */ | 57 | /* Number of non-pinned cpu/task breakpoints in a cpu */ |
58 | static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); | 58 | static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); |
@@ -73,7 +73,7 @@ static DEFINE_MUTEX(nr_bp_mutex); | |||
73 | static unsigned int max_task_bp_pinned(int cpu) | 73 | static unsigned int max_task_bp_pinned(int cpu) |
74 | { | 74 | { |
75 | int i; | 75 | int i; |
76 | unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); | 76 | unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); |
77 | 77 | ||
78 | for (i = HBP_NUM -1; i >= 0; i--) { | 78 | for (i = HBP_NUM -1; i >= 0; i--) { |
79 | if (tsk_pinned[i] > 0) | 79 | if (tsk_pinned[i] > 0) |
@@ -83,15 +83,51 @@ static unsigned int max_task_bp_pinned(int cpu) | |||
83 | return 0; | 83 | return 0; |
84 | } | 84 | } |
85 | 85 | ||
86 | static int task_bp_pinned(struct task_struct *tsk) | ||
87 | { | ||
88 | struct perf_event_context *ctx = tsk->perf_event_ctxp; | ||
89 | struct list_head *list; | ||
90 | struct perf_event *bp; | ||
91 | unsigned long flags; | ||
92 | int count = 0; | ||
93 | |||
94 | if (WARN_ONCE(!ctx, "No perf context for this task")) | ||
95 | return 0; | ||
96 | |||
97 | list = &ctx->event_list; | ||
98 | |||
99 | spin_lock_irqsave(&ctx->lock, flags); | ||
100 | |||
101 | /* | ||
102 | * The current breakpoint counter is not included in the list | ||
103 | * at the open() callback time | ||
104 | */ | ||
105 | list_for_each_entry(bp, list, event_entry) { | ||
106 | if (bp->attr.type == PERF_TYPE_BREAKPOINT) | ||
107 | count++; | ||
108 | } | ||
109 | |||
110 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
111 | |||
112 | return count; | ||
113 | } | ||
114 | |||
86 | /* | 115 | /* |
87 | * Report the number of pinned/un-pinned breakpoints we have in | 116 | * Report the number of pinned/un-pinned breakpoints we have in |
88 | * a given cpu (cpu > -1) or in all of them (cpu = -1). | 117 | * a given cpu (cpu > -1) or in all of them (cpu = -1). |
89 | */ | 118 | */ |
90 | static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) | 119 | static void |
120 | fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) | ||
91 | { | 121 | { |
122 | int cpu = bp->cpu; | ||
123 | struct task_struct *tsk = bp->ctx->task; | ||
124 | |||
92 | if (cpu >= 0) { | 125 | if (cpu >= 0) { |
93 | slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); | 126 | slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); |
94 | slots->pinned += max_task_bp_pinned(cpu); | 127 | if (!tsk) |
128 | slots->pinned += max_task_bp_pinned(cpu); | ||
129 | else | ||
130 | slots->pinned += task_bp_pinned(tsk); | ||
95 | slots->flexible = per_cpu(nr_bp_flexible, cpu); | 131 | slots->flexible = per_cpu(nr_bp_flexible, cpu); |
96 | 132 | ||
97 | return; | 133 | return; |
@@ -101,7 +137,10 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) | |||
101 | unsigned int nr; | 137 | unsigned int nr; |
102 | 138 | ||
103 | nr = per_cpu(nr_cpu_bp_pinned, cpu); | 139 | nr = per_cpu(nr_cpu_bp_pinned, cpu); |
104 | nr += max_task_bp_pinned(cpu); | 140 | if (!tsk) |
141 | nr += max_task_bp_pinned(cpu); | ||
142 | else | ||
143 | nr += task_bp_pinned(tsk); | ||
105 | 144 | ||
106 | if (nr > slots->pinned) | 145 | if (nr > slots->pinned) |
107 | slots->pinned = nr; | 146 | slots->pinned = nr; |
@@ -118,35 +157,12 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) | |||
118 | */ | 157 | */ |
119 | static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) | 158 | static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) |
120 | { | 159 | { |
121 | int count = 0; | ||
122 | struct perf_event *bp; | ||
123 | struct perf_event_context *ctx = tsk->perf_event_ctxp; | ||
124 | unsigned int *tsk_pinned; | 160 | unsigned int *tsk_pinned; |
125 | struct list_head *list; | 161 | int count = 0; |
126 | unsigned long flags; | ||
127 | |||
128 | if (WARN_ONCE(!ctx, "No perf context for this task")) | ||
129 | return; | ||
130 | |||
131 | list = &ctx->event_list; | ||
132 | |||
133 | spin_lock_irqsave(&ctx->lock, flags); | ||
134 | |||
135 | /* | ||
136 | * The current breakpoint counter is not included in the list | ||
137 | * at the open() callback time | ||
138 | */ | ||
139 | list_for_each_entry(bp, list, event_entry) { | ||
140 | if (bp->attr.type == PERF_TYPE_BREAKPOINT) | ||
141 | count++; | ||
142 | } | ||
143 | 162 | ||
144 | spin_unlock_irqrestore(&ctx->lock, flags); | 163 | count = task_bp_pinned(tsk); |
145 | 164 | ||
146 | if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) | 165 | tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); |
147 | return; | ||
148 | |||
149 | tsk_pinned = per_cpu(task_bp_pinned, cpu); | ||
150 | if (enable) { | 166 | if (enable) { |
151 | tsk_pinned[count]++; | 167 | tsk_pinned[count]++; |
152 | if (count > 0) | 168 | if (count > 0) |
@@ -193,7 +209,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) | |||
193 | * - If attached to a single cpu, check: | 209 | * - If attached to a single cpu, check: |
194 | * | 210 | * |
195 | * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) | 211 | * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) |
196 | * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM | 212 | * + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM |
197 | * | 213 | * |
198 | * -> If there are already non-pinned counters in this cpu, it means | 214 | * -> If there are already non-pinned counters in this cpu, it means |
199 | * there is already a free slot for them. | 215 | * there is already a free slot for them. |
@@ -204,7 +220,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) | |||
204 | * - If attached to every cpus, check: | 220 | * - If attached to every cpus, check: |
205 | * | 221 | * |
206 | * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) | 222 | * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) |
207 | * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM | 223 | * + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM |
208 | * | 224 | * |
209 | * -> This is roughly the same, except we check the number of per cpu | 225 | * -> This is roughly the same, except we check the number of per cpu |
210 | * bp for every cpu and we keep the max one. Same for the per tasks | 226 | * bp for every cpu and we keep the max one. Same for the per tasks |
@@ -216,7 +232,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) | |||
216 | * - If attached to a single cpu, check: | 232 | * - If attached to a single cpu, check: |
217 | * | 233 | * |
218 | * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) | 234 | * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) |
219 | * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM | 235 | * + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM |
220 | * | 236 | * |
221 | * -> Same checks as before. But now the nr_bp_flexible, if any, must keep | 237 | * -> Same checks as before. But now the nr_bp_flexible, if any, must keep |
222 | * one register at least (or they will never be fed). | 238 | * one register at least (or they will never be fed). |
@@ -224,7 +240,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) | |||
224 | * - If attached to every cpus, check: | 240 | * - If attached to every cpus, check: |
225 | * | 241 | * |
226 | * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) | 242 | * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) |
227 | * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM | 243 | * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM |
228 | */ | 244 | */ |
229 | int reserve_bp_slot(struct perf_event *bp) | 245 | int reserve_bp_slot(struct perf_event *bp) |
230 | { | 246 | { |
@@ -233,7 +249,7 @@ int reserve_bp_slot(struct perf_event *bp) | |||
233 | 249 | ||
234 | mutex_lock(&nr_bp_mutex); | 250 | mutex_lock(&nr_bp_mutex); |
235 | 251 | ||
236 | fetch_bp_busy_slots(&slots, bp->cpu); | 252 | fetch_bp_busy_slots(&slots, bp); |
237 | 253 | ||
238 | /* Flexible counters need to keep at least one slot */ | 254 | /* Flexible counters need to keep at least one slot */ |
239 | if (slots.pinned + (!!slots.flexible) == HBP_NUM) { | 255 | if (slots.pinned + (!!slots.flexible) == HBP_NUM) { |
@@ -259,7 +275,7 @@ void release_bp_slot(struct perf_event *bp) | |||
259 | } | 275 | } |
260 | 276 | ||
261 | 277 | ||
262 | int __register_perf_hw_breakpoint(struct perf_event *bp) | 278 | int register_perf_hw_breakpoint(struct perf_event *bp) |
263 | { | 279 | { |
264 | int ret; | 280 | int ret; |
265 | 281 | ||
@@ -276,19 +292,12 @@ int __register_perf_hw_breakpoint(struct perf_event *bp) | |||
276 | * This is a quick hack that will be removed soon, once we remove | 292 | * This is a quick hack that will be removed soon, once we remove |
277 | * the tmp breakpoints from ptrace | 293 | * the tmp breakpoints from ptrace |
278 | */ | 294 | */ |
279 | if (!bp->attr.disabled || bp->callback == perf_bp_event) | 295 | if (!bp->attr.disabled || !bp->overflow_handler) |
280 | ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | 296 | ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); |
281 | 297 | ||
282 | return ret; | 298 | return ret; |
283 | } | 299 | } |
284 | 300 | ||
285 | int register_perf_hw_breakpoint(struct perf_event *bp) | ||
286 | { | ||
287 | bp->callback = perf_bp_event; | ||
288 | |||
289 | return __register_perf_hw_breakpoint(bp); | ||
290 | } | ||
291 | |||
292 | /** | 301 | /** |
293 | * register_user_hw_breakpoint - register a hardware breakpoint for user space | 302 | * register_user_hw_breakpoint - register a hardware breakpoint for user space |
294 | * @attr: breakpoint attributes | 303 | * @attr: breakpoint attributes |
@@ -297,7 +306,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp) | |||
297 | */ | 306 | */ |
298 | struct perf_event * | 307 | struct perf_event * |
299 | register_user_hw_breakpoint(struct perf_event_attr *attr, | 308 | register_user_hw_breakpoint(struct perf_event_attr *attr, |
300 | perf_callback_t triggered, | 309 | perf_overflow_handler_t triggered, |
301 | struct task_struct *tsk) | 310 | struct task_struct *tsk) |
302 | { | 311 | { |
303 | return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); | 312 | return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); |
@@ -311,19 +320,40 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); | |||
311 | * @triggered: callback to trigger when we hit the breakpoint | 320 | * @triggered: callback to trigger when we hit the breakpoint |
312 | * @tsk: pointer to 'task_struct' of the process to which the address belongs | 321 | * @tsk: pointer to 'task_struct' of the process to which the address belongs |
313 | */ | 322 | */ |
314 | struct perf_event * | 323 | int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) |
315 | modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr, | ||
316 | perf_callback_t triggered, | ||
317 | struct task_struct *tsk) | ||
318 | { | 324 | { |
319 | /* | 325 | u64 old_addr = bp->attr.bp_addr; |
320 | * FIXME: do it without unregistering | 326 | int old_type = bp->attr.bp_type; |
321 | * - We don't want to lose our slot | 327 | int old_len = bp->attr.bp_len; |
322 | * - If the new bp is incorrect, don't lose the older one | 328 | int err = 0; |
323 | */ | ||
324 | unregister_hw_breakpoint(bp); | ||
325 | 329 | ||
326 | return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); | 330 | perf_event_disable(bp); |
331 | |||
332 | bp->attr.bp_addr = attr->bp_addr; | ||
333 | bp->attr.bp_type = attr->bp_type; | ||
334 | bp->attr.bp_len = attr->bp_len; | ||
335 | |||
336 | if (attr->disabled) | ||
337 | goto end; | ||
338 | |||
339 | err = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | ||
340 | if (!err) | ||
341 | perf_event_enable(bp); | ||
342 | |||
343 | if (err) { | ||
344 | bp->attr.bp_addr = old_addr; | ||
345 | bp->attr.bp_type = old_type; | ||
346 | bp->attr.bp_len = old_len; | ||
347 | if (!bp->attr.disabled) | ||
348 | perf_event_enable(bp); | ||
349 | |||
350 | return err; | ||
351 | } | ||
352 | |||
353 | end: | ||
354 | bp->attr.disabled = attr->disabled; | ||
355 | |||
356 | return 0; | ||
327 | } | 357 | } |
328 | EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); | 358 | EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); |
329 | 359 | ||
@@ -348,7 +378,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); | |||
348 | */ | 378 | */ |
349 | struct perf_event ** | 379 | struct perf_event ** |
350 | register_wide_hw_breakpoint(struct perf_event_attr *attr, | 380 | register_wide_hw_breakpoint(struct perf_event_attr *attr, |
351 | perf_callback_t triggered) | 381 | perf_overflow_handler_t triggered) |
352 | { | 382 | { |
353 | struct perf_event **cpu_events, **pevent, *bp; | 383 | struct perf_event **cpu_events, **pevent, *bp; |
354 | long err; | 384 | long err; |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 22b0a6eedf24..e49ea1c5232d 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -220,7 +220,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
220 | /* | 220 | /* |
221 | * If we are seeing only the odd spurious IRQ caused by | 221 | * If we are seeing only the odd spurious IRQ caused by |
222 | * bus asynchronicity then don't eventually trigger an error, | 222 | * bus asynchronicity then don't eventually trigger an error, |
223 | * otherwise the couter becomes a doomsday timer for otherwise | 223 | * otherwise the counter becomes a doomsday timer for otherwise |
224 | * working systems | 224 | * working systems |
225 | */ | 225 | */ |
226 | if (time_after(jiffies, desc->last_unhandled + HZ/10)) | 226 | if (time_after(jiffies, desc->last_unhandled + HZ/10)) |
diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 7d7014634022..2eb517e23514 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c | |||
@@ -129,6 +129,7 @@ struct task_struct *kgdb_usethread; | |||
129 | struct task_struct *kgdb_contthread; | 129 | struct task_struct *kgdb_contthread; |
130 | 130 | ||
131 | int kgdb_single_step; | 131 | int kgdb_single_step; |
132 | pid_t kgdb_sstep_pid; | ||
132 | 133 | ||
133 | /* Our I/O buffers. */ | 134 | /* Our I/O buffers. */ |
134 | static char remcom_in_buffer[BUFMAX]; | 135 | static char remcom_in_buffer[BUFMAX]; |
@@ -541,12 +542,17 @@ static struct task_struct *getthread(struct pt_regs *regs, int tid) | |||
541 | */ | 542 | */ |
542 | if (tid == 0 || tid == -1) | 543 | if (tid == 0 || tid == -1) |
543 | tid = -atomic_read(&kgdb_active) - 2; | 544 | tid = -atomic_read(&kgdb_active) - 2; |
544 | if (tid < 0) { | 545 | if (tid < -1 && tid > -NR_CPUS - 2) { |
545 | if (kgdb_info[-tid - 2].task) | 546 | if (kgdb_info[-tid - 2].task) |
546 | return kgdb_info[-tid - 2].task; | 547 | return kgdb_info[-tid - 2].task; |
547 | else | 548 | else |
548 | return idle_task(-tid - 2); | 549 | return idle_task(-tid - 2); |
549 | } | 550 | } |
551 | if (tid <= 0) { | ||
552 | printk(KERN_ERR "KGDB: Internal thread select error\n"); | ||
553 | dump_stack(); | ||
554 | return NULL; | ||
555 | } | ||
550 | 556 | ||
551 | /* | 557 | /* |
552 | * find_task_by_pid_ns() does not take the tasklist lock anymore | 558 | * find_task_by_pid_ns() does not take the tasklist lock anymore |
@@ -619,7 +625,8 @@ static void kgdb_flush_swbreak_addr(unsigned long addr) | |||
619 | static int kgdb_activate_sw_breakpoints(void) | 625 | static int kgdb_activate_sw_breakpoints(void) |
620 | { | 626 | { |
621 | unsigned long addr; | 627 | unsigned long addr; |
622 | int error = 0; | 628 | int error; |
629 | int ret = 0; | ||
623 | int i; | 630 | int i; |
624 | 631 | ||
625 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { | 632 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { |
@@ -629,13 +636,16 @@ static int kgdb_activate_sw_breakpoints(void) | |||
629 | addr = kgdb_break[i].bpt_addr; | 636 | addr = kgdb_break[i].bpt_addr; |
630 | error = kgdb_arch_set_breakpoint(addr, | 637 | error = kgdb_arch_set_breakpoint(addr, |
631 | kgdb_break[i].saved_instr); | 638 | kgdb_break[i].saved_instr); |
632 | if (error) | 639 | if (error) { |
633 | return error; | 640 | ret = error; |
641 | printk(KERN_INFO "KGDB: BP install failed: %lx", addr); | ||
642 | continue; | ||
643 | } | ||
634 | 644 | ||
635 | kgdb_flush_swbreak_addr(addr); | 645 | kgdb_flush_swbreak_addr(addr); |
636 | kgdb_break[i].state = BP_ACTIVE; | 646 | kgdb_break[i].state = BP_ACTIVE; |
637 | } | 647 | } |
638 | return 0; | 648 | return ret; |
639 | } | 649 | } |
640 | 650 | ||
641 | static int kgdb_set_sw_break(unsigned long addr) | 651 | static int kgdb_set_sw_break(unsigned long addr) |
@@ -682,7 +692,8 @@ static int kgdb_set_sw_break(unsigned long addr) | |||
682 | static int kgdb_deactivate_sw_breakpoints(void) | 692 | static int kgdb_deactivate_sw_breakpoints(void) |
683 | { | 693 | { |
684 | unsigned long addr; | 694 | unsigned long addr; |
685 | int error = 0; | 695 | int error; |
696 | int ret = 0; | ||
686 | int i; | 697 | int i; |
687 | 698 | ||
688 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { | 699 | for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { |
@@ -691,13 +702,15 @@ static int kgdb_deactivate_sw_breakpoints(void) | |||
691 | addr = kgdb_break[i].bpt_addr; | 702 | addr = kgdb_break[i].bpt_addr; |
692 | error = kgdb_arch_remove_breakpoint(addr, | 703 | error = kgdb_arch_remove_breakpoint(addr, |
693 | kgdb_break[i].saved_instr); | 704 | kgdb_break[i].saved_instr); |
694 | if (error) | 705 | if (error) { |
695 | return error; | 706 | printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr); |
707 | ret = error; | ||
708 | } | ||
696 | 709 | ||
697 | kgdb_flush_swbreak_addr(addr); | 710 | kgdb_flush_swbreak_addr(addr); |
698 | kgdb_break[i].state = BP_SET; | 711 | kgdb_break[i].state = BP_SET; |
699 | } | 712 | } |
700 | return 0; | 713 | return ret; |
701 | } | 714 | } |
702 | 715 | ||
703 | static int kgdb_remove_sw_break(unsigned long addr) | 716 | static int kgdb_remove_sw_break(unsigned long addr) |
@@ -1204,8 +1217,10 @@ static int gdb_cmd_exception_pass(struct kgdb_state *ks) | |||
1204 | return 1; | 1217 | return 1; |
1205 | 1218 | ||
1206 | } else { | 1219 | } else { |
1207 | error_packet(remcom_out_buffer, -EINVAL); | 1220 | kgdb_msg_write("KGDB only knows signal 9 (pass)" |
1208 | return 0; | 1221 | " and 15 (pass and disconnect)\n" |
1222 | "Executing a continue without signal passing\n", 0); | ||
1223 | remcom_in_buffer[0] = 'c'; | ||
1209 | } | 1224 | } |
1210 | 1225 | ||
1211 | /* Indicate fall through */ | 1226 | /* Indicate fall through */ |
@@ -1395,6 +1410,7 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) | |||
1395 | struct kgdb_state kgdb_var; | 1410 | struct kgdb_state kgdb_var; |
1396 | struct kgdb_state *ks = &kgdb_var; | 1411 | struct kgdb_state *ks = &kgdb_var; |
1397 | unsigned long flags; | 1412 | unsigned long flags; |
1413 | int sstep_tries = 100; | ||
1398 | int error = 0; | 1414 | int error = 0; |
1399 | int i, cpu; | 1415 | int i, cpu; |
1400 | 1416 | ||
@@ -1425,13 +1441,14 @@ acquirelock: | |||
1425 | cpu_relax(); | 1441 | cpu_relax(); |
1426 | 1442 | ||
1427 | /* | 1443 | /* |
1428 | * Do not start the debugger connection on this CPU if the last | 1444 | * For single stepping, try to only enter on the processor |
1429 | * instance of the exception handler wanted to come into the | 1445 | * that was single stepping. To gaurd against a deadlock, the |
1430 | * debugger on a different CPU via a single step | 1446 | * kernel will only try for the value of sstep_tries before |
1447 | * giving up and continuing on. | ||
1431 | */ | 1448 | */ |
1432 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1 && | 1449 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1 && |
1433 | atomic_read(&kgdb_cpu_doing_single_step) != cpu) { | 1450 | (kgdb_info[cpu].task && |
1434 | 1451 | kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { | |
1435 | atomic_set(&kgdb_active, -1); | 1452 | atomic_set(&kgdb_active, -1); |
1436 | touch_softlockup_watchdog(); | 1453 | touch_softlockup_watchdog(); |
1437 | clocksource_touch_watchdog(); | 1454 | clocksource_touch_watchdog(); |
@@ -1524,6 +1541,13 @@ acquirelock: | |||
1524 | } | 1541 | } |
1525 | 1542 | ||
1526 | kgdb_restore: | 1543 | kgdb_restore: |
1544 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { | ||
1545 | int sstep_cpu = atomic_read(&kgdb_cpu_doing_single_step); | ||
1546 | if (kgdb_info[sstep_cpu].task) | ||
1547 | kgdb_sstep_pid = kgdb_info[sstep_cpu].task->pid; | ||
1548 | else | ||
1549 | kgdb_sstep_pid = 0; | ||
1550 | } | ||
1527 | /* Free kgdb_active */ | 1551 | /* Free kgdb_active */ |
1528 | atomic_set(&kgdb_active, -1); | 1552 | atomic_set(&kgdb_active, -1); |
1529 | touch_softlockup_watchdog(); | 1553 | touch_softlockup_watchdog(); |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index f5dcd36d3151..4f8df01dbe51 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -168,7 +168,7 @@ static void lock_time_inc(struct lock_time *lt, u64 time) | |||
168 | if (time > lt->max) | 168 | if (time > lt->max) |
169 | lt->max = time; | 169 | lt->max = time; |
170 | 170 | ||
171 | if (time < lt->min || !lt->min) | 171 | if (time < lt->min || !lt->nr) |
172 | lt->min = time; | 172 | lt->min = time; |
173 | 173 | ||
174 | lt->total += time; | 174 | lt->total += time; |
@@ -177,8 +177,15 @@ static void lock_time_inc(struct lock_time *lt, u64 time) | |||
177 | 177 | ||
178 | static inline void lock_time_add(struct lock_time *src, struct lock_time *dst) | 178 | static inline void lock_time_add(struct lock_time *src, struct lock_time *dst) |
179 | { | 179 | { |
180 | dst->min += src->min; | 180 | if (!src->nr) |
181 | dst->max += src->max; | 181 | return; |
182 | |||
183 | if (src->max > dst->max) | ||
184 | dst->max = src->max; | ||
185 | |||
186 | if (src->min < dst->min || !dst->nr) | ||
187 | dst->min = src->min; | ||
188 | |||
182 | dst->total += src->total; | 189 | dst->total += src->total; |
183 | dst->nr += src->nr; | 190 | dst->nr += src->nr; |
184 | } | 191 | } |
@@ -379,7 +386,8 @@ static int save_trace(struct stack_trace *trace) | |||
379 | * complete trace that maxes out the entries provided will be reported | 386 | * complete trace that maxes out the entries provided will be reported |
380 | * as incomplete, friggin useless </rant> | 387 | * as incomplete, friggin useless </rant> |
381 | */ | 388 | */ |
382 | if (trace->entries[trace->nr_entries-1] == ULONG_MAX) | 389 | if (trace->nr_entries != 0 && |
390 | trace->entries[trace->nr_entries-1] == ULONG_MAX) | ||
383 | trace->nr_entries--; | 391 | trace->nr_entries--; |
384 | 392 | ||
385 | trace->max_entries = trace->nr_entries; | 393 | trace->max_entries = trace->nr_entries; |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 6b7ddba1dd64..e73e53c7582f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -36,7 +36,7 @@ | |||
36 | /* | 36 | /* |
37 | * Each CPU has a list of per CPU events: | 37 | * Each CPU has a list of per CPU events: |
38 | */ | 38 | */ |
39 | DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); | 39 | static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); |
40 | 40 | ||
41 | int perf_max_events __read_mostly = 1; | 41 | int perf_max_events __read_mostly = 1; |
42 | static int perf_reserved_percpu __read_mostly; | 42 | static int perf_reserved_percpu __read_mostly; |
@@ -476,7 +476,7 @@ static void perf_event_remove_from_context(struct perf_event *event) | |||
476 | if (!task) { | 476 | if (!task) { |
477 | /* | 477 | /* |
478 | * Per cpu events are removed via an smp call and | 478 | * Per cpu events are removed via an smp call and |
479 | * the removal is always sucessful. | 479 | * the removal is always successful. |
480 | */ | 480 | */ |
481 | smp_call_function_single(event->cpu, | 481 | smp_call_function_single(event->cpu, |
482 | __perf_event_remove_from_context, | 482 | __perf_event_remove_from_context, |
@@ -567,7 +567,7 @@ static void __perf_event_disable(void *info) | |||
567 | * is the current context on this CPU and preemption is disabled, | 567 | * is the current context on this CPU and preemption is disabled, |
568 | * hence we can't get into perf_event_task_sched_out for this context. | 568 | * hence we can't get into perf_event_task_sched_out for this context. |
569 | */ | 569 | */ |
570 | static void perf_event_disable(struct perf_event *event) | 570 | void perf_event_disable(struct perf_event *event) |
571 | { | 571 | { |
572 | struct perf_event_context *ctx = event->ctx; | 572 | struct perf_event_context *ctx = event->ctx; |
573 | struct task_struct *task = ctx->task; | 573 | struct task_struct *task = ctx->task; |
@@ -845,7 +845,7 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
845 | if (!task) { | 845 | if (!task) { |
846 | /* | 846 | /* |
847 | * Per cpu events are installed via an smp call and | 847 | * Per cpu events are installed via an smp call and |
848 | * the install is always sucessful. | 848 | * the install is always successful. |
849 | */ | 849 | */ |
850 | smp_call_function_single(cpu, __perf_install_in_context, | 850 | smp_call_function_single(cpu, __perf_install_in_context, |
851 | event, 1); | 851 | event, 1); |
@@ -971,7 +971,7 @@ static void __perf_event_enable(void *info) | |||
971 | * perf_event_for_each_child or perf_event_for_each as described | 971 | * perf_event_for_each_child or perf_event_for_each as described |
972 | * for perf_event_disable. | 972 | * for perf_event_disable. |
973 | */ | 973 | */ |
974 | static void perf_event_enable(struct perf_event *event) | 974 | void perf_event_enable(struct perf_event *event) |
975 | { | 975 | { |
976 | struct perf_event_context *ctx = event->ctx; | 976 | struct perf_event_context *ctx = event->ctx; |
977 | struct task_struct *task = ctx->task; | 977 | struct task_struct *task = ctx->task; |
@@ -1579,7 +1579,6 @@ static void | |||
1579 | __perf_event_init_context(struct perf_event_context *ctx, | 1579 | __perf_event_init_context(struct perf_event_context *ctx, |
1580 | struct task_struct *task) | 1580 | struct task_struct *task) |
1581 | { | 1581 | { |
1582 | memset(ctx, 0, sizeof(*ctx)); | ||
1583 | spin_lock_init(&ctx->lock); | 1582 | spin_lock_init(&ctx->lock); |
1584 | mutex_init(&ctx->mutex); | 1583 | mutex_init(&ctx->mutex); |
1585 | INIT_LIST_HEAD(&ctx->group_list); | 1584 | INIT_LIST_HEAD(&ctx->group_list); |
@@ -1654,7 +1653,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) | |||
1654 | } | 1653 | } |
1655 | 1654 | ||
1656 | if (!ctx) { | 1655 | if (!ctx) { |
1657 | ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); | 1656 | ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); |
1658 | err = -ENOMEM; | 1657 | err = -ENOMEM; |
1659 | if (!ctx) | 1658 | if (!ctx) |
1660 | goto errout; | 1659 | goto errout; |
@@ -4011,6 +4010,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
4011 | event->pmu->read(event); | 4010 | event->pmu->read(event); |
4012 | 4011 | ||
4013 | data.addr = 0; | 4012 | data.addr = 0; |
4013 | data.raw = NULL; | ||
4014 | data.period = event->hw.last_period; | 4014 | data.period = event->hw.last_period; |
4015 | regs = get_irq_regs(); | 4015 | regs = get_irq_regs(); |
4016 | /* | 4016 | /* |
@@ -4080,8 +4080,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event) | |||
4080 | u64 now; | 4080 | u64 now; |
4081 | 4081 | ||
4082 | now = cpu_clock(cpu); | 4082 | now = cpu_clock(cpu); |
4083 | prev = atomic64_read(&event->hw.prev_count); | 4083 | prev = atomic64_xchg(&event->hw.prev_count, now); |
4084 | atomic64_set(&event->hw.prev_count, now); | ||
4085 | atomic64_add(now - prev, &event->count); | 4084 | atomic64_add(now - prev, &event->count); |
4086 | } | 4085 | } |
4087 | 4086 | ||
@@ -4286,15 +4285,8 @@ static void bp_perf_event_destroy(struct perf_event *event) | |||
4286 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | 4285 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) |
4287 | { | 4286 | { |
4288 | int err; | 4287 | int err; |
4289 | /* | 4288 | |
4290 | * The breakpoint is already filled if we haven't created the counter | 4289 | err = register_perf_hw_breakpoint(bp); |
4291 | * through perf syscall | ||
4292 | * FIXME: manage to get trigerred to NULL if it comes from syscalls | ||
4293 | */ | ||
4294 | if (!bp->callback) | ||
4295 | err = register_perf_hw_breakpoint(bp); | ||
4296 | else | ||
4297 | err = __register_perf_hw_breakpoint(bp); | ||
4298 | if (err) | 4290 | if (err) |
4299 | return ERR_PTR(err); | 4291 | return ERR_PTR(err); |
4300 | 4292 | ||
@@ -4308,6 +4300,7 @@ void perf_bp_event(struct perf_event *bp, void *data) | |||
4308 | struct perf_sample_data sample; | 4300 | struct perf_sample_data sample; |
4309 | struct pt_regs *regs = data; | 4301 | struct pt_regs *regs = data; |
4310 | 4302 | ||
4303 | sample.raw = NULL; | ||
4311 | sample.addr = bp->attr.bp_addr; | 4304 | sample.addr = bp->attr.bp_addr; |
4312 | 4305 | ||
4313 | if (!perf_exclude_event(bp, regs)) | 4306 | if (!perf_exclude_event(bp, regs)) |
@@ -4390,7 +4383,7 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4390 | struct perf_event_context *ctx, | 4383 | struct perf_event_context *ctx, |
4391 | struct perf_event *group_leader, | 4384 | struct perf_event *group_leader, |
4392 | struct perf_event *parent_event, | 4385 | struct perf_event *parent_event, |
4393 | perf_callback_t callback, | 4386 | perf_overflow_handler_t overflow_handler, |
4394 | gfp_t gfpflags) | 4387 | gfp_t gfpflags) |
4395 | { | 4388 | { |
4396 | const struct pmu *pmu; | 4389 | const struct pmu *pmu; |
@@ -4433,10 +4426,10 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4433 | 4426 | ||
4434 | event->state = PERF_EVENT_STATE_INACTIVE; | 4427 | event->state = PERF_EVENT_STATE_INACTIVE; |
4435 | 4428 | ||
4436 | if (!callback && parent_event) | 4429 | if (!overflow_handler && parent_event) |
4437 | callback = parent_event->callback; | 4430 | overflow_handler = parent_event->overflow_handler; |
4438 | 4431 | ||
4439 | event->callback = callback; | 4432 | event->overflow_handler = overflow_handler; |
4440 | 4433 | ||
4441 | if (attr->disabled) | 4434 | if (attr->disabled) |
4442 | event->state = PERF_EVENT_STATE_OFF; | 4435 | event->state = PERF_EVENT_STATE_OFF; |
@@ -4776,7 +4769,8 @@ err_put_context: | |||
4776 | */ | 4769 | */ |
4777 | struct perf_event * | 4770 | struct perf_event * |
4778 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | 4771 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, |
4779 | pid_t pid, perf_callback_t callback) | 4772 | pid_t pid, |
4773 | perf_overflow_handler_t overflow_handler) | ||
4780 | { | 4774 | { |
4781 | struct perf_event *event; | 4775 | struct perf_event *event; |
4782 | struct perf_event_context *ctx; | 4776 | struct perf_event_context *ctx; |
@@ -4793,7 +4787,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
4793 | } | 4787 | } |
4794 | 4788 | ||
4795 | event = perf_event_alloc(attr, cpu, ctx, NULL, | 4789 | event = perf_event_alloc(attr, cpu, ctx, NULL, |
4796 | NULL, callback, GFP_KERNEL); | 4790 | NULL, overflow_handler, GFP_KERNEL); |
4797 | if (IS_ERR(event)) { | 4791 | if (IS_ERR(event)) { |
4798 | err = PTR_ERR(event); | 4792 | err = PTR_ERR(event); |
4799 | goto err_put_context; | 4793 | goto err_put_context; |
@@ -5090,7 +5084,7 @@ again: | |||
5090 | */ | 5084 | */ |
5091 | int perf_event_init_task(struct task_struct *child) | 5085 | int perf_event_init_task(struct task_struct *child) |
5092 | { | 5086 | { |
5093 | struct perf_event_context *child_ctx, *parent_ctx; | 5087 | struct perf_event_context *child_ctx = NULL, *parent_ctx; |
5094 | struct perf_event_context *cloned_ctx; | 5088 | struct perf_event_context *cloned_ctx; |
5095 | struct perf_event *event; | 5089 | struct perf_event *event; |
5096 | struct task_struct *parent = current; | 5090 | struct task_struct *parent = current; |
@@ -5106,20 +5100,6 @@ int perf_event_init_task(struct task_struct *child) | |||
5106 | return 0; | 5100 | return 0; |
5107 | 5101 | ||
5108 | /* | 5102 | /* |
5109 | * This is executed from the parent task context, so inherit | ||
5110 | * events that have been marked for cloning. | ||
5111 | * First allocate and initialize a context for the child. | ||
5112 | */ | ||
5113 | |||
5114 | child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); | ||
5115 | if (!child_ctx) | ||
5116 | return -ENOMEM; | ||
5117 | |||
5118 | __perf_event_init_context(child_ctx, child); | ||
5119 | child->perf_event_ctxp = child_ctx; | ||
5120 | get_task_struct(child); | ||
5121 | |||
5122 | /* | ||
5123 | * If the parent's context is a clone, pin it so it won't get | 5103 | * If the parent's context is a clone, pin it so it won't get |
5124 | * swapped under us. | 5104 | * swapped under us. |
5125 | */ | 5105 | */ |
@@ -5149,6 +5129,26 @@ int perf_event_init_task(struct task_struct *child) | |||
5149 | continue; | 5129 | continue; |
5150 | } | 5130 | } |
5151 | 5131 | ||
5132 | if (!child->perf_event_ctxp) { | ||
5133 | /* | ||
5134 | * This is executed from the parent task context, so | ||
5135 | * inherit events that have been marked for cloning. | ||
5136 | * First allocate and initialize a context for the | ||
5137 | * child. | ||
5138 | */ | ||
5139 | |||
5140 | child_ctx = kzalloc(sizeof(struct perf_event_context), | ||
5141 | GFP_KERNEL); | ||
5142 | if (!child_ctx) { | ||
5143 | ret = -ENOMEM; | ||
5144 | goto exit; | ||
5145 | } | ||
5146 | |||
5147 | __perf_event_init_context(child_ctx, child); | ||
5148 | child->perf_event_ctxp = child_ctx; | ||
5149 | get_task_struct(child); | ||
5150 | } | ||
5151 | |||
5152 | ret = inherit_group(event, parent, parent_ctx, | 5152 | ret = inherit_group(event, parent, parent_ctx, |
5153 | child, child_ctx); | 5153 | child, child_ctx); |
5154 | if (ret) { | 5154 | if (ret) { |
@@ -5177,6 +5177,7 @@ int perf_event_init_task(struct task_struct *child) | |||
5177 | get_ctx(child_ctx->parent_ctx); | 5177 | get_ctx(child_ctx->parent_ctx); |
5178 | } | 5178 | } |
5179 | 5179 | ||
5180 | exit: | ||
5180 | mutex_unlock(&parent_ctx->mutex); | 5181 | mutex_unlock(&parent_ctx->mutex); |
5181 | 5182 | ||
5182 | perf_unpin_context(parent_ctx); | 5183 | perf_unpin_context(parent_ctx); |
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index dfdec524d1b7..3db49b9ca374 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c | |||
@@ -29,7 +29,6 @@ | |||
29 | 29 | ||
30 | #include <linux/pm_qos_params.h> | 30 | #include <linux/pm_qos_params.h> |
31 | #include <linux/sched.h> | 31 | #include <linux/sched.h> |
32 | #include <linux/smp_lock.h> | ||
33 | #include <linux/spinlock.h> | 32 | #include <linux/spinlock.h> |
34 | #include <linux/slab.h> | 33 | #include <linux/slab.h> |
35 | #include <linux/time.h> | 34 | #include <linux/time.h> |
@@ -344,37 +343,33 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) | |||
344 | } | 343 | } |
345 | EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); | 344 | EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); |
346 | 345 | ||
347 | #define PID_NAME_LEN sizeof("process_1234567890") | 346 | #define PID_NAME_LEN 32 |
348 | static char name[PID_NAME_LEN]; | ||
349 | 347 | ||
350 | static int pm_qos_power_open(struct inode *inode, struct file *filp) | 348 | static int pm_qos_power_open(struct inode *inode, struct file *filp) |
351 | { | 349 | { |
352 | int ret; | 350 | int ret; |
353 | long pm_qos_class; | 351 | long pm_qos_class; |
352 | char name[PID_NAME_LEN]; | ||
354 | 353 | ||
355 | lock_kernel(); | ||
356 | pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); | 354 | pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); |
357 | if (pm_qos_class >= 0) { | 355 | if (pm_qos_class >= 0) { |
358 | filp->private_data = (void *)pm_qos_class; | 356 | filp->private_data = (void *)pm_qos_class; |
359 | sprintf(name, "process_%d", current->pid); | 357 | snprintf(name, PID_NAME_LEN, "process_%d", current->pid); |
360 | ret = pm_qos_add_requirement(pm_qos_class, name, | 358 | ret = pm_qos_add_requirement(pm_qos_class, name, |
361 | PM_QOS_DEFAULT_VALUE); | 359 | PM_QOS_DEFAULT_VALUE); |
362 | if (ret >= 0) { | 360 | if (ret >= 0) |
363 | unlock_kernel(); | ||
364 | return 0; | 361 | return 0; |
365 | } | ||
366 | } | 362 | } |
367 | unlock_kernel(); | ||
368 | |||
369 | return -EPERM; | 363 | return -EPERM; |
370 | } | 364 | } |
371 | 365 | ||
372 | static int pm_qos_power_release(struct inode *inode, struct file *filp) | 366 | static int pm_qos_power_release(struct inode *inode, struct file *filp) |
373 | { | 367 | { |
374 | int pm_qos_class; | 368 | int pm_qos_class; |
369 | char name[PID_NAME_LEN]; | ||
375 | 370 | ||
376 | pm_qos_class = (long)filp->private_data; | 371 | pm_qos_class = (long)filp->private_data; |
377 | sprintf(name, "process_%d", current->pid); | 372 | snprintf(name, PID_NAME_LEN, "process_%d", current->pid); |
378 | pm_qos_remove_requirement(pm_qos_class, name); | 373 | pm_qos_remove_requirement(pm_qos_class, name); |
379 | 374 | ||
380 | return 0; | 375 | return 0; |
@@ -385,13 +380,14 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, | |||
385 | { | 380 | { |
386 | s32 value; | 381 | s32 value; |
387 | int pm_qos_class; | 382 | int pm_qos_class; |
383 | char name[PID_NAME_LEN]; | ||
388 | 384 | ||
389 | pm_qos_class = (long)filp->private_data; | 385 | pm_qos_class = (long)filp->private_data; |
390 | if (count != sizeof(s32)) | 386 | if (count != sizeof(s32)) |
391 | return -EINVAL; | 387 | return -EINVAL; |
392 | if (copy_from_user(&value, buf, sizeof(s32))) | 388 | if (copy_from_user(&value, buf, sizeof(s32))) |
393 | return -EFAULT; | 389 | return -EFAULT; |
394 | sprintf(name, "process_%d", current->pid); | 390 | snprintf(name, PID_NAME_LEN, "process_%d", current->pid); |
395 | pm_qos_update_requirement(pm_qos_class, name, value); | 391 | pm_qos_update_requirement(pm_qos_class, name, value); |
396 | 392 | ||
397 | return sizeof(s32); | 393 | return sizeof(s32); |
diff --git a/kernel/resource.c b/kernel/resource.c index fb11a58b9594..dc15686b7a77 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -308,35 +308,37 @@ static int find_resource(struct resource *root, struct resource *new, | |||
308 | void *alignf_data) | 308 | void *alignf_data) |
309 | { | 309 | { |
310 | struct resource *this = root->child; | 310 | struct resource *this = root->child; |
311 | resource_size_t start, end; | ||
311 | 312 | ||
312 | new->start = root->start; | 313 | start = root->start; |
313 | /* | 314 | /* |
314 | * Skip past an allocated resource that starts at 0, since the assignment | 315 | * Skip past an allocated resource that starts at 0, since the assignment |
315 | * of this->start - 1 to new->end below would cause an underflow. | 316 | * of this->start - 1 to new->end below would cause an underflow. |
316 | */ | 317 | */ |
317 | if (this && this->start == 0) { | 318 | if (this && this->start == 0) { |
318 | new->start = this->end + 1; | 319 | start = this->end + 1; |
319 | this = this->sibling; | 320 | this = this->sibling; |
320 | } | 321 | } |
321 | for(;;) { | 322 | for(;;) { |
322 | if (this) | 323 | if (this) |
323 | new->end = this->start - 1; | 324 | end = this->start - 1; |
324 | else | 325 | else |
325 | new->end = root->end; | 326 | end = root->end; |
326 | if (new->start < min) | 327 | if (start < min) |
327 | new->start = min; | 328 | start = min; |
328 | if (new->end > max) | 329 | if (end > max) |
329 | new->end = max; | 330 | end = max; |
330 | new->start = ALIGN(new->start, align); | 331 | start = ALIGN(start, align); |
331 | if (alignf) | 332 | if (alignf) |
332 | alignf(alignf_data, new, size, align); | 333 | alignf(alignf_data, new, size, align); |
333 | if (new->start < new->end && new->end - new->start >= size - 1) { | 334 | if (start < end && end - start >= size - 1) { |
334 | new->end = new->start + size - 1; | 335 | new->start = start; |
336 | new->end = start + size - 1; | ||
335 | return 0; | 337 | return 0; |
336 | } | 338 | } |
337 | if (!this) | 339 | if (!this) |
338 | break; | 340 | break; |
339 | new->start = this->end + 1; | 341 | start = this->end + 1; |
340 | this = this->sibling; | 342 | this = this->sibling; |
341 | } | 343 | } |
342 | return -EBUSY; | 344 | return -EBUSY; |
diff --git a/kernel/sched.c b/kernel/sched.c index e7f2cfa6a257..ff39cadf621e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -814,6 +814,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
814 | * default: 0.25ms | 814 | * default: 0.25ms |
815 | */ | 815 | */ |
816 | unsigned int sysctl_sched_shares_ratelimit = 250000; | 816 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
817 | unsigned int normalized_sysctl_sched_shares_ratelimit = 250000; | ||
817 | 818 | ||
818 | /* | 819 | /* |
819 | * Inject some fuzzyness into changing the per-cpu group shares | 820 | * Inject some fuzzyness into changing the per-cpu group shares |
@@ -1614,7 +1615,7 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1614 | */ | 1615 | */ |
1615 | static int tg_shares_up(struct task_group *tg, void *data) | 1616 | static int tg_shares_up(struct task_group *tg, void *data) |
1616 | { | 1617 | { |
1617 | unsigned long weight, rq_weight = 0, shares = 0; | 1618 | unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0; |
1618 | unsigned long *usd_rq_weight; | 1619 | unsigned long *usd_rq_weight; |
1619 | struct sched_domain *sd = data; | 1620 | struct sched_domain *sd = data; |
1620 | unsigned long flags; | 1621 | unsigned long flags; |
@@ -1630,6 +1631,7 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1630 | weight = tg->cfs_rq[i]->load.weight; | 1631 | weight = tg->cfs_rq[i]->load.weight; |
1631 | usd_rq_weight[i] = weight; | 1632 | usd_rq_weight[i] = weight; |
1632 | 1633 | ||
1634 | rq_weight += weight; | ||
1633 | /* | 1635 | /* |
1634 | * If there are currently no tasks on the cpu pretend there | 1636 | * If there are currently no tasks on the cpu pretend there |
1635 | * is one of average load so that when a new task gets to | 1637 | * is one of average load so that when a new task gets to |
@@ -1638,10 +1640,13 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1638 | if (!weight) | 1640 | if (!weight) |
1639 | weight = NICE_0_LOAD; | 1641 | weight = NICE_0_LOAD; |
1640 | 1642 | ||
1641 | rq_weight += weight; | 1643 | sum_weight += weight; |
1642 | shares += tg->cfs_rq[i]->shares; | 1644 | shares += tg->cfs_rq[i]->shares; |
1643 | } | 1645 | } |
1644 | 1646 | ||
1647 | if (!rq_weight) | ||
1648 | rq_weight = sum_weight; | ||
1649 | |||
1645 | if ((!shares && rq_weight) || shares > tg->shares) | 1650 | if ((!shares && rq_weight) || shares > tg->shares) |
1646 | shares = tg->shares; | 1651 | shares = tg->shares; |
1647 | 1652 | ||
@@ -1810,6 +1815,22 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1810 | #endif | 1815 | #endif |
1811 | 1816 | ||
1812 | static void calc_load_account_active(struct rq *this_rq); | 1817 | static void calc_load_account_active(struct rq *this_rq); |
1818 | static void update_sysctl(void); | ||
1819 | static int get_update_sysctl_factor(void); | ||
1820 | |||
1821 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
1822 | { | ||
1823 | set_task_rq(p, cpu); | ||
1824 | #ifdef CONFIG_SMP | ||
1825 | /* | ||
1826 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
1827 | * successfuly executed on another CPU. We must ensure that updates of | ||
1828 | * per-task data have been completed by this moment. | ||
1829 | */ | ||
1830 | smp_wmb(); | ||
1831 | task_thread_info(p)->cpu = cpu; | ||
1832 | #endif | ||
1833 | } | ||
1813 | 1834 | ||
1814 | #include "sched_stats.h" | 1835 | #include "sched_stats.h" |
1815 | #include "sched_idletask.c" | 1836 | #include "sched_idletask.c" |
@@ -1967,20 +1988,6 @@ inline int task_curr(const struct task_struct *p) | |||
1967 | return cpu_curr(task_cpu(p)) == p; | 1988 | return cpu_curr(task_cpu(p)) == p; |
1968 | } | 1989 | } |
1969 | 1990 | ||
1970 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
1971 | { | ||
1972 | set_task_rq(p, cpu); | ||
1973 | #ifdef CONFIG_SMP | ||
1974 | /* | ||
1975 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
1976 | * successfuly executed on another CPU. We must ensure that updates of | ||
1977 | * per-task data have been completed by this moment. | ||
1978 | */ | ||
1979 | smp_wmb(); | ||
1980 | task_thread_info(p)->cpu = cpu; | ||
1981 | #endif | ||
1982 | } | ||
1983 | |||
1984 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, | 1991 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, |
1985 | const struct sched_class *prev_class, | 1992 | const struct sched_class *prev_class, |
1986 | int oldprio, int running) | 1993 | int oldprio, int running) |
@@ -2060,29 +2067,13 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2060 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 2067 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
2061 | { | 2068 | { |
2062 | int old_cpu = task_cpu(p); | 2069 | int old_cpu = task_cpu(p); |
2063 | struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu); | ||
2064 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), | 2070 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), |
2065 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); | 2071 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); |
2066 | u64 clock_offset; | ||
2067 | |||
2068 | clock_offset = old_rq->clock - new_rq->clock; | ||
2069 | 2072 | ||
2070 | trace_sched_migrate_task(p, new_cpu); | 2073 | trace_sched_migrate_task(p, new_cpu); |
2071 | 2074 | ||
2072 | #ifdef CONFIG_SCHEDSTATS | ||
2073 | if (p->se.wait_start) | ||
2074 | p->se.wait_start -= clock_offset; | ||
2075 | if (p->se.sleep_start) | ||
2076 | p->se.sleep_start -= clock_offset; | ||
2077 | if (p->se.block_start) | ||
2078 | p->se.block_start -= clock_offset; | ||
2079 | #endif | ||
2080 | if (old_cpu != new_cpu) { | 2075 | if (old_cpu != new_cpu) { |
2081 | p->se.nr_migrations++; | 2076 | p->se.nr_migrations++; |
2082 | #ifdef CONFIG_SCHEDSTATS | ||
2083 | if (task_hot(p, old_rq->clock, NULL)) | ||
2084 | schedstat_inc(p, se.nr_forced2_migrations); | ||
2085 | #endif | ||
2086 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, | 2077 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, |
2087 | 1, 1, NULL, 0); | 2078 | 1, 1, NULL, 0); |
2088 | } | 2079 | } |
@@ -2323,6 +2314,14 @@ void task_oncpu_function_call(struct task_struct *p, | |||
2323 | preempt_enable(); | 2314 | preempt_enable(); |
2324 | } | 2315 | } |
2325 | 2316 | ||
2317 | #ifdef CONFIG_SMP | ||
2318 | static inline | ||
2319 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) | ||
2320 | { | ||
2321 | return p->sched_class->select_task_rq(p, sd_flags, wake_flags); | ||
2322 | } | ||
2323 | #endif | ||
2324 | |||
2326 | /*** | 2325 | /*** |
2327 | * try_to_wake_up - wake up a thread | 2326 | * try_to_wake_up - wake up a thread |
2328 | * @p: the to-be-woken-up thread | 2327 | * @p: the to-be-woken-up thread |
@@ -2374,17 +2373,14 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2374 | if (task_contributes_to_load(p)) | 2373 | if (task_contributes_to_load(p)) |
2375 | rq->nr_uninterruptible--; | 2374 | rq->nr_uninterruptible--; |
2376 | p->state = TASK_WAKING; | 2375 | p->state = TASK_WAKING; |
2377 | task_rq_unlock(rq, &flags); | 2376 | __task_rq_unlock(rq); |
2378 | 2377 | ||
2379 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2378 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
2380 | if (cpu != orig_cpu) { | 2379 | if (cpu != orig_cpu) |
2381 | local_irq_save(flags); | ||
2382 | rq = cpu_rq(cpu); | ||
2383 | update_rq_clock(rq); | ||
2384 | set_task_cpu(p, cpu); | 2380 | set_task_cpu(p, cpu); |
2385 | local_irq_restore(flags); | 2381 | |
2386 | } | 2382 | rq = __task_rq_lock(p); |
2387 | rq = task_rq_lock(p, &flags); | 2383 | update_rq_clock(rq); |
2388 | 2384 | ||
2389 | WARN_ON(p->state != TASK_WAKING); | 2385 | WARN_ON(p->state != TASK_WAKING); |
2390 | cpu = task_cpu(p); | 2386 | cpu = task_cpu(p); |
@@ -2499,7 +2495,6 @@ static void __sched_fork(struct task_struct *p) | |||
2499 | p->se.avg_overlap = 0; | 2495 | p->se.avg_overlap = 0; |
2500 | p->se.start_runtime = 0; | 2496 | p->se.start_runtime = 0; |
2501 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | 2497 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; |
2502 | p->se.avg_running = 0; | ||
2503 | 2498 | ||
2504 | #ifdef CONFIG_SCHEDSTATS | 2499 | #ifdef CONFIG_SCHEDSTATS |
2505 | p->se.wait_start = 0; | 2500 | p->se.wait_start = 0; |
@@ -2521,7 +2516,6 @@ static void __sched_fork(struct task_struct *p) | |||
2521 | p->se.nr_failed_migrations_running = 0; | 2516 | p->se.nr_failed_migrations_running = 0; |
2522 | p->se.nr_failed_migrations_hot = 0; | 2517 | p->se.nr_failed_migrations_hot = 0; |
2523 | p->se.nr_forced_migrations = 0; | 2518 | p->se.nr_forced_migrations = 0; |
2524 | p->se.nr_forced2_migrations = 0; | ||
2525 | 2519 | ||
2526 | p->se.nr_wakeups = 0; | 2520 | p->se.nr_wakeups = 0; |
2527 | p->se.nr_wakeups_sync = 0; | 2521 | p->se.nr_wakeups_sync = 0; |
@@ -2558,7 +2552,6 @@ static void __sched_fork(struct task_struct *p) | |||
2558 | void sched_fork(struct task_struct *p, int clone_flags) | 2552 | void sched_fork(struct task_struct *p, int clone_flags) |
2559 | { | 2553 | { |
2560 | int cpu = get_cpu(); | 2554 | int cpu = get_cpu(); |
2561 | unsigned long flags; | ||
2562 | 2555 | ||
2563 | __sched_fork(p); | 2556 | __sched_fork(p); |
2564 | 2557 | ||
@@ -2592,13 +2585,13 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2592 | if (!rt_prio(p->prio)) | 2585 | if (!rt_prio(p->prio)) |
2593 | p->sched_class = &fair_sched_class; | 2586 | p->sched_class = &fair_sched_class; |
2594 | 2587 | ||
2588 | if (p->sched_class->task_fork) | ||
2589 | p->sched_class->task_fork(p); | ||
2590 | |||
2595 | #ifdef CONFIG_SMP | 2591 | #ifdef CONFIG_SMP |
2596 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); | 2592 | cpu = select_task_rq(p, SD_BALANCE_FORK, 0); |
2597 | #endif | 2593 | #endif |
2598 | local_irq_save(flags); | ||
2599 | update_rq_clock(cpu_rq(cpu)); | ||
2600 | set_task_cpu(p, cpu); | 2594 | set_task_cpu(p, cpu); |
2601 | local_irq_restore(flags); | ||
2602 | 2595 | ||
2603 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2596 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
2604 | if (likely(sched_info_on())) | 2597 | if (likely(sched_info_on())) |
@@ -2631,17 +2624,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2631 | rq = task_rq_lock(p, &flags); | 2624 | rq = task_rq_lock(p, &flags); |
2632 | BUG_ON(p->state != TASK_RUNNING); | 2625 | BUG_ON(p->state != TASK_RUNNING); |
2633 | update_rq_clock(rq); | 2626 | update_rq_clock(rq); |
2634 | 2627 | activate_task(rq, p, 0); | |
2635 | if (!p->sched_class->task_new || !current->se.on_rq) { | ||
2636 | activate_task(rq, p, 0); | ||
2637 | } else { | ||
2638 | /* | ||
2639 | * Let the scheduling class do new task startup | ||
2640 | * management (if any): | ||
2641 | */ | ||
2642 | p->sched_class->task_new(rq, p); | ||
2643 | inc_nr_running(rq); | ||
2644 | } | ||
2645 | trace_sched_wakeup_new(rq, p, 1); | 2628 | trace_sched_wakeup_new(rq, p, 1); |
2646 | check_preempt_curr(rq, p, WF_FORK); | 2629 | check_preempt_curr(rq, p, WF_FORK); |
2647 | #ifdef CONFIG_SMP | 2630 | #ifdef CONFIG_SMP |
@@ -3156,7 +3139,7 @@ out: | |||
3156 | void sched_exec(void) | 3139 | void sched_exec(void) |
3157 | { | 3140 | { |
3158 | int new_cpu, this_cpu = get_cpu(); | 3141 | int new_cpu, this_cpu = get_cpu(); |
3159 | new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); | 3142 | new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0); |
3160 | put_cpu(); | 3143 | put_cpu(); |
3161 | if (new_cpu != this_cpu) | 3144 | if (new_cpu != this_cpu) |
3162 | sched_migrate_task(current, new_cpu); | 3145 | sched_migrate_task(current, new_cpu); |
@@ -3172,10 +3155,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, | |||
3172 | deactivate_task(src_rq, p, 0); | 3155 | deactivate_task(src_rq, p, 0); |
3173 | set_task_cpu(p, this_cpu); | 3156 | set_task_cpu(p, this_cpu); |
3174 | activate_task(this_rq, p, 0); | 3157 | activate_task(this_rq, p, 0); |
3175 | /* | ||
3176 | * Note that idle threads have a prio of MAX_PRIO, for this test | ||
3177 | * to be always true for them. | ||
3178 | */ | ||
3179 | check_preempt_curr(this_rq, p, 0); | 3158 | check_preempt_curr(this_rq, p, 0); |
3180 | } | 3159 | } |
3181 | 3160 | ||
@@ -4134,7 +4113,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
4134 | unsigned long flags; | 4113 | unsigned long flags; |
4135 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4114 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4136 | 4115 | ||
4137 | cpumask_copy(cpus, cpu_online_mask); | 4116 | cpumask_copy(cpus, cpu_active_mask); |
4138 | 4117 | ||
4139 | /* | 4118 | /* |
4140 | * When power savings policy is enabled for the parent domain, idle | 4119 | * When power savings policy is enabled for the parent domain, idle |
@@ -4297,7 +4276,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
4297 | int all_pinned = 0; | 4276 | int all_pinned = 0; |
4298 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4277 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4299 | 4278 | ||
4300 | cpumask_copy(cpus, cpu_online_mask); | 4279 | cpumask_copy(cpus, cpu_active_mask); |
4301 | 4280 | ||
4302 | /* | 4281 | /* |
4303 | * When power savings policy is enabled for the parent domain, idle | 4282 | * When power savings policy is enabled for the parent domain, idle |
@@ -4694,7 +4673,7 @@ int select_nohz_load_balancer(int stop_tick) | |||
4694 | cpumask_set_cpu(cpu, nohz.cpu_mask); | 4673 | cpumask_set_cpu(cpu, nohz.cpu_mask); |
4695 | 4674 | ||
4696 | /* time for ilb owner also to sleep */ | 4675 | /* time for ilb owner also to sleep */ |
4697 | if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { | 4676 | if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) { |
4698 | if (atomic_read(&nohz.load_balancer) == cpu) | 4677 | if (atomic_read(&nohz.load_balancer) == cpu) |
4699 | atomic_set(&nohz.load_balancer, -1); | 4678 | atomic_set(&nohz.load_balancer, -1); |
4700 | return 0; | 4679 | return 0; |
@@ -5396,13 +5375,14 @@ static inline void schedule_debug(struct task_struct *prev) | |||
5396 | #endif | 5375 | #endif |
5397 | } | 5376 | } |
5398 | 5377 | ||
5399 | static void put_prev_task(struct rq *rq, struct task_struct *p) | 5378 | static void put_prev_task(struct rq *rq, struct task_struct *prev) |
5400 | { | 5379 | { |
5401 | u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; | 5380 | if (prev->state == TASK_RUNNING) { |
5381 | u64 runtime = prev->se.sum_exec_runtime; | ||
5402 | 5382 | ||
5403 | update_avg(&p->se.avg_running, runtime); | 5383 | runtime -= prev->se.prev_sum_exec_runtime; |
5384 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
5404 | 5385 | ||
5405 | if (p->state == TASK_RUNNING) { | ||
5406 | /* | 5386 | /* |
5407 | * In order to avoid avg_overlap growing stale when we are | 5387 | * In order to avoid avg_overlap growing stale when we are |
5408 | * indeed overlapping and hence not getting put to sleep, grow | 5388 | * indeed overlapping and hence not getting put to sleep, grow |
@@ -5412,12 +5392,9 @@ static void put_prev_task(struct rq *rq, struct task_struct *p) | |||
5412 | * correlates to the amount of cache footprint a task can | 5392 | * correlates to the amount of cache footprint a task can |
5413 | * build up. | 5393 | * build up. |
5414 | */ | 5394 | */ |
5415 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | 5395 | update_avg(&prev->se.avg_overlap, runtime); |
5416 | update_avg(&p->se.avg_overlap, runtime); | ||
5417 | } else { | ||
5418 | update_avg(&p->se.avg_running, 0); | ||
5419 | } | 5396 | } |
5420 | p->sched_class->put_prev_task(rq, p); | 5397 | prev->sched_class->put_prev_task(rq, prev); |
5421 | } | 5398 | } |
5422 | 5399 | ||
5423 | /* | 5400 | /* |
@@ -6631,6 +6608,8 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, | |||
6631 | long sched_getaffinity(pid_t pid, struct cpumask *mask) | 6608 | long sched_getaffinity(pid_t pid, struct cpumask *mask) |
6632 | { | 6609 | { |
6633 | struct task_struct *p; | 6610 | struct task_struct *p; |
6611 | unsigned long flags; | ||
6612 | struct rq *rq; | ||
6634 | int retval; | 6613 | int retval; |
6635 | 6614 | ||
6636 | get_online_cpus(); | 6615 | get_online_cpus(); |
@@ -6645,7 +6624,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
6645 | if (retval) | 6624 | if (retval) |
6646 | goto out_unlock; | 6625 | goto out_unlock; |
6647 | 6626 | ||
6627 | rq = task_rq_lock(p, &flags); | ||
6648 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); | 6628 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); |
6629 | task_rq_unlock(rq, &flags); | ||
6649 | 6630 | ||
6650 | out_unlock: | 6631 | out_unlock: |
6651 | read_unlock(&tasklist_lock); | 6632 | read_unlock(&tasklist_lock); |
@@ -6883,6 +6864,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6883 | { | 6864 | { |
6884 | struct task_struct *p; | 6865 | struct task_struct *p; |
6885 | unsigned int time_slice; | 6866 | unsigned int time_slice; |
6867 | unsigned long flags; | ||
6868 | struct rq *rq; | ||
6886 | int retval; | 6869 | int retval; |
6887 | struct timespec t; | 6870 | struct timespec t; |
6888 | 6871 | ||
@@ -6899,7 +6882,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6899 | if (retval) | 6882 | if (retval) |
6900 | goto out_unlock; | 6883 | goto out_unlock; |
6901 | 6884 | ||
6902 | time_slice = p->sched_class->get_rr_interval(p); | 6885 | rq = task_rq_lock(p, &flags); |
6886 | time_slice = p->sched_class->get_rr_interval(rq, p); | ||
6887 | task_rq_unlock(rq, &flags); | ||
6903 | 6888 | ||
6904 | read_unlock(&tasklist_lock); | 6889 | read_unlock(&tasklist_lock); |
6905 | jiffies_to_timespec(time_slice, &t); | 6890 | jiffies_to_timespec(time_slice, &t); |
@@ -7000,7 +6985,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
7000 | __sched_fork(idle); | 6985 | __sched_fork(idle); |
7001 | idle->se.exec_start = sched_clock(); | 6986 | idle->se.exec_start = sched_clock(); |
7002 | 6987 | ||
7003 | idle->prio = idle->normal_prio = MAX_PRIO; | ||
7004 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); | 6988 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); |
7005 | __set_task_cpu(idle, cpu); | 6989 | __set_task_cpu(idle, cpu); |
7006 | 6990 | ||
@@ -7041,22 +7025,43 @@ cpumask_var_t nohz_cpu_mask; | |||
7041 | * | 7025 | * |
7042 | * This idea comes from the SD scheduler of Con Kolivas: | 7026 | * This idea comes from the SD scheduler of Con Kolivas: |
7043 | */ | 7027 | */ |
7044 | static inline void sched_init_granularity(void) | 7028 | static int get_update_sysctl_factor(void) |
7045 | { | 7029 | { |
7046 | unsigned int factor = 1 + ilog2(num_online_cpus()); | 7030 | unsigned int cpus = min_t(int, num_online_cpus(), 8); |
7047 | const unsigned long limit = 200000000; | 7031 | unsigned int factor; |
7032 | |||
7033 | switch (sysctl_sched_tunable_scaling) { | ||
7034 | case SCHED_TUNABLESCALING_NONE: | ||
7035 | factor = 1; | ||
7036 | break; | ||
7037 | case SCHED_TUNABLESCALING_LINEAR: | ||
7038 | factor = cpus; | ||
7039 | break; | ||
7040 | case SCHED_TUNABLESCALING_LOG: | ||
7041 | default: | ||
7042 | factor = 1 + ilog2(cpus); | ||
7043 | break; | ||
7044 | } | ||
7048 | 7045 | ||
7049 | sysctl_sched_min_granularity *= factor; | 7046 | return factor; |
7050 | if (sysctl_sched_min_granularity > limit) | 7047 | } |
7051 | sysctl_sched_min_granularity = limit; | ||
7052 | 7048 | ||
7053 | sysctl_sched_latency *= factor; | 7049 | static void update_sysctl(void) |
7054 | if (sysctl_sched_latency > limit) | 7050 | { |
7055 | sysctl_sched_latency = limit; | 7051 | unsigned int factor = get_update_sysctl_factor(); |
7056 | 7052 | ||
7057 | sysctl_sched_wakeup_granularity *= factor; | 7053 | #define SET_SYSCTL(name) \ |
7054 | (sysctl_##name = (factor) * normalized_sysctl_##name) | ||
7055 | SET_SYSCTL(sched_min_granularity); | ||
7056 | SET_SYSCTL(sched_latency); | ||
7057 | SET_SYSCTL(sched_wakeup_granularity); | ||
7058 | SET_SYSCTL(sched_shares_ratelimit); | ||
7059 | #undef SET_SYSCTL | ||
7060 | } | ||
7058 | 7061 | ||
7059 | sysctl_sched_shares_ratelimit *= factor; | 7062 | static inline void sched_init_granularity(void) |
7063 | { | ||
7064 | update_sysctl(); | ||
7060 | } | 7065 | } |
7061 | 7066 | ||
7062 | #ifdef CONFIG_SMP | 7067 | #ifdef CONFIG_SMP |
@@ -7093,7 +7098,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7093 | int ret = 0; | 7098 | int ret = 0; |
7094 | 7099 | ||
7095 | rq = task_rq_lock(p, &flags); | 7100 | rq = task_rq_lock(p, &flags); |
7096 | if (!cpumask_intersects(new_mask, cpu_online_mask)) { | 7101 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
7097 | ret = -EINVAL; | 7102 | ret = -EINVAL; |
7098 | goto out; | 7103 | goto out; |
7099 | } | 7104 | } |
@@ -7115,7 +7120,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7115 | if (cpumask_test_cpu(task_cpu(p), new_mask)) | 7120 | if (cpumask_test_cpu(task_cpu(p), new_mask)) |
7116 | goto out; | 7121 | goto out; |
7117 | 7122 | ||
7118 | if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { | 7123 | if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) { |
7119 | /* Need help from migration thread: drop lock and wait. */ | 7124 | /* Need help from migration thread: drop lock and wait. */ |
7120 | struct task_struct *mt = rq->migration_thread; | 7125 | struct task_struct *mt = rq->migration_thread; |
7121 | 7126 | ||
@@ -7269,19 +7274,19 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
7269 | 7274 | ||
7270 | again: | 7275 | again: |
7271 | /* Look for allowed, online CPU in same node. */ | 7276 | /* Look for allowed, online CPU in same node. */ |
7272 | for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask) | 7277 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) |
7273 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 7278 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) |
7274 | goto move; | 7279 | goto move; |
7275 | 7280 | ||
7276 | /* Any allowed, online CPU? */ | 7281 | /* Any allowed, online CPU? */ |
7277 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask); | 7282 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); |
7278 | if (dest_cpu < nr_cpu_ids) | 7283 | if (dest_cpu < nr_cpu_ids) |
7279 | goto move; | 7284 | goto move; |
7280 | 7285 | ||
7281 | /* No more Mr. Nice Guy. */ | 7286 | /* No more Mr. Nice Guy. */ |
7282 | if (dest_cpu >= nr_cpu_ids) { | 7287 | if (dest_cpu >= nr_cpu_ids) { |
7283 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); | 7288 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); |
7284 | dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed); | 7289 | dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); |
7285 | 7290 | ||
7286 | /* | 7291 | /* |
7287 | * Don't tell them about moving exiting tasks or | 7292 | * Don't tell them about moving exiting tasks or |
@@ -7310,7 +7315,7 @@ move: | |||
7310 | */ | 7315 | */ |
7311 | static void migrate_nr_uninterruptible(struct rq *rq_src) | 7316 | static void migrate_nr_uninterruptible(struct rq *rq_src) |
7312 | { | 7317 | { |
7313 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); | 7318 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); |
7314 | unsigned long flags; | 7319 | unsigned long flags; |
7315 | 7320 | ||
7316 | local_irq_save(flags); | 7321 | local_irq_save(flags); |
@@ -7563,7 +7568,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | |||
7563 | static struct ctl_table_header *sd_sysctl_header; | 7568 | static struct ctl_table_header *sd_sysctl_header; |
7564 | static void register_sched_domain_sysctl(void) | 7569 | static void register_sched_domain_sysctl(void) |
7565 | { | 7570 | { |
7566 | int i, cpu_num = num_online_cpus(); | 7571 | int i, cpu_num = num_possible_cpus(); |
7567 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); | 7572 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); |
7568 | char buf[32]; | 7573 | char buf[32]; |
7569 | 7574 | ||
@@ -7573,7 +7578,7 @@ static void register_sched_domain_sysctl(void) | |||
7573 | if (entry == NULL) | 7578 | if (entry == NULL) |
7574 | return; | 7579 | return; |
7575 | 7580 | ||
7576 | for_each_online_cpu(i) { | 7581 | for_each_possible_cpu(i) { |
7577 | snprintf(buf, 32, "cpu%d", i); | 7582 | snprintf(buf, 32, "cpu%d", i); |
7578 | entry->procname = kstrdup(buf, GFP_KERNEL); | 7583 | entry->procname = kstrdup(buf, GFP_KERNEL); |
7579 | entry->mode = 0555; | 7584 | entry->mode = 0555; |
@@ -7703,7 +7708,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7703 | spin_lock_irq(&rq->lock); | 7708 | spin_lock_irq(&rq->lock); |
7704 | update_rq_clock(rq); | 7709 | update_rq_clock(rq); |
7705 | deactivate_task(rq, rq->idle, 0); | 7710 | deactivate_task(rq, rq->idle, 0); |
7706 | rq->idle->static_prio = MAX_PRIO; | ||
7707 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); | 7711 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); |
7708 | rq->idle->sched_class = &idle_sched_class; | 7712 | rq->idle->sched_class = &idle_sched_class; |
7709 | migrate_dead_tasks(cpu); | 7713 | migrate_dead_tasks(cpu); |
@@ -9099,7 +9103,7 @@ match1: | |||
9099 | if (doms_new == NULL) { | 9103 | if (doms_new == NULL) { |
9100 | ndoms_cur = 0; | 9104 | ndoms_cur = 0; |
9101 | doms_new = &fallback_doms; | 9105 | doms_new = &fallback_doms; |
9102 | cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map); | 9106 | cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map); |
9103 | WARN_ON_ONCE(dattr_new); | 9107 | WARN_ON_ONCE(dattr_new); |
9104 | } | 9108 | } |
9105 | 9109 | ||
@@ -9230,8 +9234,10 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
9230 | switch (action) { | 9234 | switch (action) { |
9231 | case CPU_ONLINE: | 9235 | case CPU_ONLINE: |
9232 | case CPU_ONLINE_FROZEN: | 9236 | case CPU_ONLINE_FROZEN: |
9233 | case CPU_DEAD: | 9237 | case CPU_DOWN_PREPARE: |
9234 | case CPU_DEAD_FROZEN: | 9238 | case CPU_DOWN_PREPARE_FROZEN: |
9239 | case CPU_DOWN_FAILED: | ||
9240 | case CPU_DOWN_FAILED_FROZEN: | ||
9235 | partition_sched_domains(1, NULL, NULL); | 9241 | partition_sched_domains(1, NULL, NULL); |
9236 | return NOTIFY_OK; | 9242 | return NOTIFY_OK; |
9237 | 9243 | ||
@@ -9278,7 +9284,7 @@ void __init sched_init_smp(void) | |||
9278 | #endif | 9284 | #endif |
9279 | get_online_cpus(); | 9285 | get_online_cpus(); |
9280 | mutex_lock(&sched_domains_mutex); | 9286 | mutex_lock(&sched_domains_mutex); |
9281 | arch_init_sched_domains(cpu_online_mask); | 9287 | arch_init_sched_domains(cpu_active_mask); |
9282 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); | 9288 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); |
9283 | if (cpumask_empty(non_isolated_cpus)) | 9289 | if (cpumask_empty(non_isolated_cpus)) |
9284 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); | 9290 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); |
@@ -9842,13 +9848,15 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
9842 | se = kzalloc_node(sizeof(struct sched_entity), | 9848 | se = kzalloc_node(sizeof(struct sched_entity), |
9843 | GFP_KERNEL, cpu_to_node(i)); | 9849 | GFP_KERNEL, cpu_to_node(i)); |
9844 | if (!se) | 9850 | if (!se) |
9845 | goto err; | 9851 | goto err_free_rq; |
9846 | 9852 | ||
9847 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); | 9853 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); |
9848 | } | 9854 | } |
9849 | 9855 | ||
9850 | return 1; | 9856 | return 1; |
9851 | 9857 | ||
9858 | err_free_rq: | ||
9859 | kfree(cfs_rq); | ||
9852 | err: | 9860 | err: |
9853 | return 0; | 9861 | return 0; |
9854 | } | 9862 | } |
@@ -9930,13 +9938,15 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
9930 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), | 9938 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), |
9931 | GFP_KERNEL, cpu_to_node(i)); | 9939 | GFP_KERNEL, cpu_to_node(i)); |
9932 | if (!rt_se) | 9940 | if (!rt_se) |
9933 | goto err; | 9941 | goto err_free_rq; |
9934 | 9942 | ||
9935 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); | 9943 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); |
9936 | } | 9944 | } |
9937 | 9945 | ||
9938 | return 1; | 9946 | return 1; |
9939 | 9947 | ||
9948 | err_free_rq: | ||
9949 | kfree(rt_rq); | ||
9940 | err: | 9950 | err: |
9941 | return 0; | 9951 | return 0; |
9942 | } | 9952 | } |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 6988cf08f705..5ae24fc65d75 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -309,6 +309,12 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
309 | print_rq(m, rq, cpu); | 309 | print_rq(m, rq, cpu); |
310 | } | 310 | } |
311 | 311 | ||
312 | static const char *sched_tunable_scaling_names[] = { | ||
313 | "none", | ||
314 | "logaritmic", | ||
315 | "linear" | ||
316 | }; | ||
317 | |||
312 | static int sched_debug_show(struct seq_file *m, void *v) | 318 | static int sched_debug_show(struct seq_file *m, void *v) |
313 | { | 319 | { |
314 | u64 now = ktime_to_ns(ktime_get()); | 320 | u64 now = ktime_to_ns(ktime_get()); |
@@ -334,6 +340,10 @@ static int sched_debug_show(struct seq_file *m, void *v) | |||
334 | #undef PN | 340 | #undef PN |
335 | #undef P | 341 | #undef P |
336 | 342 | ||
343 | SEQ_printf(m, " .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling", | ||
344 | sysctl_sched_tunable_scaling, | ||
345 | sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); | ||
346 | |||
337 | for_each_online_cpu(cpu) | 347 | for_each_online_cpu(cpu) |
338 | print_cpu(m, cpu); | 348 | print_cpu(m, cpu); |
339 | 349 | ||
@@ -399,7 +409,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
399 | PN(se.sum_exec_runtime); | 409 | PN(se.sum_exec_runtime); |
400 | PN(se.avg_overlap); | 410 | PN(se.avg_overlap); |
401 | PN(se.avg_wakeup); | 411 | PN(se.avg_wakeup); |
402 | PN(se.avg_running); | ||
403 | 412 | ||
404 | nr_switches = p->nvcsw + p->nivcsw; | 413 | nr_switches = p->nvcsw + p->nivcsw; |
405 | 414 | ||
@@ -423,7 +432,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
423 | P(se.nr_failed_migrations_running); | 432 | P(se.nr_failed_migrations_running); |
424 | P(se.nr_failed_migrations_hot); | 433 | P(se.nr_failed_migrations_hot); |
425 | P(se.nr_forced_migrations); | 434 | P(se.nr_forced_migrations); |
426 | P(se.nr_forced2_migrations); | ||
427 | P(se.nr_wakeups); | 435 | P(se.nr_wakeups); |
428 | P(se.nr_wakeups_sync); | 436 | P(se.nr_wakeups_sync); |
429 | P(se.nr_wakeups_migrate); | 437 | P(se.nr_wakeups_migrate); |
@@ -499,7 +507,6 @@ void proc_sched_set_task(struct task_struct *p) | |||
499 | p->se.nr_failed_migrations_running = 0; | 507 | p->se.nr_failed_migrations_running = 0; |
500 | p->se.nr_failed_migrations_hot = 0; | 508 | p->se.nr_failed_migrations_hot = 0; |
501 | p->se.nr_forced_migrations = 0; | 509 | p->se.nr_forced_migrations = 0; |
502 | p->se.nr_forced2_migrations = 0; | ||
503 | p->se.nr_wakeups = 0; | 510 | p->se.nr_wakeups = 0; |
504 | p->se.nr_wakeups_sync = 0; | 511 | p->se.nr_wakeups_sync = 0; |
505 | p->se.nr_wakeups_migrate = 0; | 512 | p->se.nr_wakeups_migrate = 0; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f61837ad336d..804a411838f1 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -21,6 +21,7 @@ | |||
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/latencytop.h> | 23 | #include <linux/latencytop.h> |
24 | #include <linux/sched.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * Targeted preemption latency for CPU-bound tasks: | 27 | * Targeted preemption latency for CPU-bound tasks: |
@@ -35,12 +36,26 @@ | |||
35 | * run vmstat and monitor the context-switches (cs) field) | 36 | * run vmstat and monitor the context-switches (cs) field) |
36 | */ | 37 | */ |
37 | unsigned int sysctl_sched_latency = 5000000ULL; | 38 | unsigned int sysctl_sched_latency = 5000000ULL; |
39 | unsigned int normalized_sysctl_sched_latency = 5000000ULL; | ||
40 | |||
41 | /* | ||
42 | * The initial- and re-scaling of tunables is configurable | ||
43 | * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) | ||
44 | * | ||
45 | * Options are: | ||
46 | * SCHED_TUNABLESCALING_NONE - unscaled, always *1 | ||
47 | * SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) | ||
48 | * SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus | ||
49 | */ | ||
50 | enum sched_tunable_scaling sysctl_sched_tunable_scaling | ||
51 | = SCHED_TUNABLESCALING_LOG; | ||
38 | 52 | ||
39 | /* | 53 | /* |
40 | * Minimal preemption granularity for CPU-bound tasks: | 54 | * Minimal preemption granularity for CPU-bound tasks: |
41 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) | 55 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) |
42 | */ | 56 | */ |
43 | unsigned int sysctl_sched_min_granularity = 1000000ULL; | 57 | unsigned int sysctl_sched_min_granularity = 1000000ULL; |
58 | unsigned int normalized_sysctl_sched_min_granularity = 1000000ULL; | ||
44 | 59 | ||
45 | /* | 60 | /* |
46 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity | 61 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity |
@@ -70,6 +85,7 @@ unsigned int __read_mostly sysctl_sched_compat_yield; | |||
70 | * have immediate wakeup/sleep latencies. | 85 | * have immediate wakeup/sleep latencies. |
71 | */ | 86 | */ |
72 | unsigned int sysctl_sched_wakeup_granularity = 1000000UL; | 87 | unsigned int sysctl_sched_wakeup_granularity = 1000000UL; |
88 | unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; | ||
73 | 89 | ||
74 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 90 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
75 | 91 | ||
@@ -383,11 +399,12 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | |||
383 | */ | 399 | */ |
384 | 400 | ||
385 | #ifdef CONFIG_SCHED_DEBUG | 401 | #ifdef CONFIG_SCHED_DEBUG |
386 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 402 | int sched_proc_update_handler(struct ctl_table *table, int write, |
387 | void __user *buffer, size_t *lenp, | 403 | void __user *buffer, size_t *lenp, |
388 | loff_t *ppos) | 404 | loff_t *ppos) |
389 | { | 405 | { |
390 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 406 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
407 | int factor = get_update_sysctl_factor(); | ||
391 | 408 | ||
392 | if (ret || !write) | 409 | if (ret || !write) |
393 | return ret; | 410 | return ret; |
@@ -395,6 +412,14 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
395 | sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency, | 412 | sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency, |
396 | sysctl_sched_min_granularity); | 413 | sysctl_sched_min_granularity); |
397 | 414 | ||
415 | #define WRT_SYSCTL(name) \ | ||
416 | (normalized_sysctl_##name = sysctl_##name / (factor)) | ||
417 | WRT_SYSCTL(sched_min_granularity); | ||
418 | WRT_SYSCTL(sched_latency); | ||
419 | WRT_SYSCTL(sched_wakeup_granularity); | ||
420 | WRT_SYSCTL(sched_shares_ratelimit); | ||
421 | #undef WRT_SYSCTL | ||
422 | |||
398 | return 0; | 423 | return 0; |
399 | } | 424 | } |
400 | #endif | 425 | #endif |
@@ -1403,7 +1428,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
1403 | new_cpu = prev_cpu; | 1428 | new_cpu = prev_cpu; |
1404 | } | 1429 | } |
1405 | 1430 | ||
1406 | rcu_read_lock(); | ||
1407 | for_each_domain(cpu, tmp) { | 1431 | for_each_domain(cpu, tmp) { |
1408 | /* | 1432 | /* |
1409 | * If power savings logic is enabled for a domain, see if we | 1433 | * If power savings logic is enabled for a domain, see if we |
@@ -1484,10 +1508,8 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
1484 | update_shares(tmp); | 1508 | update_shares(tmp); |
1485 | } | 1509 | } |
1486 | 1510 | ||
1487 | if (affine_sd && wake_affine(affine_sd, p, sync)) { | 1511 | if (affine_sd && wake_affine(affine_sd, p, sync)) |
1488 | new_cpu = cpu; | 1512 | return cpu; |
1489 | goto out; | ||
1490 | } | ||
1491 | 1513 | ||
1492 | while (sd) { | 1514 | while (sd) { |
1493 | int load_idx = sd->forkexec_idx; | 1515 | int load_idx = sd->forkexec_idx; |
@@ -1528,8 +1550,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
1528 | /* while loop will break here if sd == NULL */ | 1550 | /* while loop will break here if sd == NULL */ |
1529 | } | 1551 | } |
1530 | 1552 | ||
1531 | out: | ||
1532 | rcu_read_unlock(); | ||
1533 | return new_cpu; | 1553 | return new_cpu; |
1534 | } | 1554 | } |
1535 | #endif /* CONFIG_SMP */ | 1555 | #endif /* CONFIG_SMP */ |
@@ -1651,12 +1671,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1651 | int sync = wake_flags & WF_SYNC; | 1671 | int sync = wake_flags & WF_SYNC; |
1652 | int scale = cfs_rq->nr_running >= sched_nr_latency; | 1672 | int scale = cfs_rq->nr_running >= sched_nr_latency; |
1653 | 1673 | ||
1654 | update_curr(cfs_rq); | 1674 | if (unlikely(rt_prio(p->prio))) |
1655 | 1675 | goto preempt; | |
1656 | if (unlikely(rt_prio(p->prio))) { | ||
1657 | resched_task(curr); | ||
1658 | return; | ||
1659 | } | ||
1660 | 1676 | ||
1661 | if (unlikely(p->sched_class != &fair_sched_class)) | 1677 | if (unlikely(p->sched_class != &fair_sched_class)) |
1662 | return; | 1678 | return; |
@@ -1682,50 +1698,44 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1682 | return; | 1698 | return; |
1683 | 1699 | ||
1684 | /* Idle tasks are by definition preempted by everybody. */ | 1700 | /* Idle tasks are by definition preempted by everybody. */ |
1685 | if (unlikely(curr->policy == SCHED_IDLE)) { | 1701 | if (unlikely(curr->policy == SCHED_IDLE)) |
1686 | resched_task(curr); | 1702 | goto preempt; |
1687 | return; | ||
1688 | } | ||
1689 | 1703 | ||
1690 | if ((sched_feat(WAKEUP_SYNC) && sync) || | 1704 | if (sched_feat(WAKEUP_SYNC) && sync) |
1691 | (sched_feat(WAKEUP_OVERLAP) && | 1705 | goto preempt; |
1692 | (se->avg_overlap < sysctl_sched_migration_cost && | ||
1693 | pse->avg_overlap < sysctl_sched_migration_cost))) { | ||
1694 | resched_task(curr); | ||
1695 | return; | ||
1696 | } | ||
1697 | 1706 | ||
1698 | if (sched_feat(WAKEUP_RUNNING)) { | 1707 | if (sched_feat(WAKEUP_OVERLAP) && |
1699 | if (pse->avg_running < se->avg_running) { | 1708 | se->avg_overlap < sysctl_sched_migration_cost && |
1700 | set_next_buddy(pse); | 1709 | pse->avg_overlap < sysctl_sched_migration_cost) |
1701 | resched_task(curr); | 1710 | goto preempt; |
1702 | return; | ||
1703 | } | ||
1704 | } | ||
1705 | 1711 | ||
1706 | if (!sched_feat(WAKEUP_PREEMPT)) | 1712 | if (!sched_feat(WAKEUP_PREEMPT)) |
1707 | return; | 1713 | return; |
1708 | 1714 | ||
1715 | update_curr(cfs_rq); | ||
1709 | find_matching_se(&se, &pse); | 1716 | find_matching_se(&se, &pse); |
1710 | |||
1711 | BUG_ON(!pse); | 1717 | BUG_ON(!pse); |
1718 | if (wakeup_preempt_entity(se, pse) == 1) | ||
1719 | goto preempt; | ||
1712 | 1720 | ||
1713 | if (wakeup_preempt_entity(se, pse) == 1) { | 1721 | return; |
1714 | resched_task(curr); | 1722 | |
1715 | /* | 1723 | preempt: |
1716 | * Only set the backward buddy when the current task is still | 1724 | resched_task(curr); |
1717 | * on the rq. This can happen when a wakeup gets interleaved | 1725 | /* |
1718 | * with schedule on the ->pre_schedule() or idle_balance() | 1726 | * Only set the backward buddy when the current task is still |
1719 | * point, either of which can * drop the rq lock. | 1727 | * on the rq. This can happen when a wakeup gets interleaved |
1720 | * | 1728 | * with schedule on the ->pre_schedule() or idle_balance() |
1721 | * Also, during early boot the idle thread is in the fair class, | 1729 | * point, either of which can * drop the rq lock. |
1722 | * for obvious reasons its a bad idea to schedule back to it. | 1730 | * |
1723 | */ | 1731 | * Also, during early boot the idle thread is in the fair class, |
1724 | if (unlikely(!se->on_rq || curr == rq->idle)) | 1732 | * for obvious reasons its a bad idea to schedule back to it. |
1725 | return; | 1733 | */ |
1726 | if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) | 1734 | if (unlikely(!se->on_rq || curr == rq->idle)) |
1727 | set_last_buddy(se); | 1735 | return; |
1728 | } | 1736 | |
1737 | if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se)) | ||
1738 | set_last_buddy(se); | ||
1729 | } | 1739 | } |
1730 | 1740 | ||
1731 | static struct task_struct *pick_next_task_fair(struct rq *rq) | 1741 | static struct task_struct *pick_next_task_fair(struct rq *rq) |
@@ -1905,6 +1915,17 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1905 | 1915 | ||
1906 | return 0; | 1916 | return 0; |
1907 | } | 1917 | } |
1918 | |||
1919 | static void rq_online_fair(struct rq *rq) | ||
1920 | { | ||
1921 | update_sysctl(); | ||
1922 | } | ||
1923 | |||
1924 | static void rq_offline_fair(struct rq *rq) | ||
1925 | { | ||
1926 | update_sysctl(); | ||
1927 | } | ||
1928 | |||
1908 | #endif /* CONFIG_SMP */ | 1929 | #endif /* CONFIG_SMP */ |
1909 | 1930 | ||
1910 | /* | 1931 | /* |
@@ -1922,28 +1943,30 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) | |||
1922 | } | 1943 | } |
1923 | 1944 | ||
1924 | /* | 1945 | /* |
1925 | * Share the fairness runtime between parent and child, thus the | 1946 | * called on fork with the child task as argument from the parent's context |
1926 | * total amount of pressure for CPU stays equal - new tasks | 1947 | * - child not yet on the tasklist |
1927 | * get a chance to run but frequent forkers are not allowed to | 1948 | * - preemption disabled |
1928 | * monopolize the CPU. Note: the parent runqueue is locked, | ||
1929 | * the child is not running yet. | ||
1930 | */ | 1949 | */ |
1931 | static void task_new_fair(struct rq *rq, struct task_struct *p) | 1950 | static void task_fork_fair(struct task_struct *p) |
1932 | { | 1951 | { |
1933 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | 1952 | struct cfs_rq *cfs_rq = task_cfs_rq(current); |
1934 | struct sched_entity *se = &p->se, *curr = cfs_rq->curr; | 1953 | struct sched_entity *se = &p->se, *curr = cfs_rq->curr; |
1935 | int this_cpu = smp_processor_id(); | 1954 | int this_cpu = smp_processor_id(); |
1955 | struct rq *rq = this_rq(); | ||
1956 | unsigned long flags; | ||
1957 | |||
1958 | spin_lock_irqsave(&rq->lock, flags); | ||
1936 | 1959 | ||
1937 | sched_info_queued(p); | 1960 | if (unlikely(task_cpu(p) != this_cpu)) |
1961 | __set_task_cpu(p, this_cpu); | ||
1938 | 1962 | ||
1939 | update_curr(cfs_rq); | 1963 | update_curr(cfs_rq); |
1964 | |||
1940 | if (curr) | 1965 | if (curr) |
1941 | se->vruntime = curr->vruntime; | 1966 | se->vruntime = curr->vruntime; |
1942 | place_entity(cfs_rq, se, 1); | 1967 | place_entity(cfs_rq, se, 1); |
1943 | 1968 | ||
1944 | /* 'curr' will be NULL if the child belongs to a different group */ | 1969 | if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) { |
1945 | if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) && | ||
1946 | curr && entity_before(curr, se)) { | ||
1947 | /* | 1970 | /* |
1948 | * Upon rescheduling, sched_class::put_prev_task() will place | 1971 | * Upon rescheduling, sched_class::put_prev_task() will place |
1949 | * 'current' within the tree based on its new key value. | 1972 | * 'current' within the tree based on its new key value. |
@@ -1952,7 +1975,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) | |||
1952 | resched_task(rq->curr); | 1975 | resched_task(rq->curr); |
1953 | } | 1976 | } |
1954 | 1977 | ||
1955 | enqueue_task_fair(rq, p, 0); | 1978 | spin_unlock_irqrestore(&rq->lock, flags); |
1956 | } | 1979 | } |
1957 | 1980 | ||
1958 | /* | 1981 | /* |
@@ -2014,21 +2037,17 @@ static void moved_group_fair(struct task_struct *p) | |||
2014 | } | 2037 | } |
2015 | #endif | 2038 | #endif |
2016 | 2039 | ||
2017 | unsigned int get_rr_interval_fair(struct task_struct *task) | 2040 | unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task) |
2018 | { | 2041 | { |
2019 | struct sched_entity *se = &task->se; | 2042 | struct sched_entity *se = &task->se; |
2020 | unsigned long flags; | ||
2021 | struct rq *rq; | ||
2022 | unsigned int rr_interval = 0; | 2043 | unsigned int rr_interval = 0; |
2023 | 2044 | ||
2024 | /* | 2045 | /* |
2025 | * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise | 2046 | * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise |
2026 | * idle runqueue: | 2047 | * idle runqueue: |
2027 | */ | 2048 | */ |
2028 | rq = task_rq_lock(task, &flags); | ||
2029 | if (rq->cfs.load.weight) | 2049 | if (rq->cfs.load.weight) |
2030 | rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); | 2050 | rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); |
2031 | task_rq_unlock(rq, &flags); | ||
2032 | 2051 | ||
2033 | return rr_interval; | 2052 | return rr_interval; |
2034 | } | 2053 | } |
@@ -2052,11 +2071,13 @@ static const struct sched_class fair_sched_class = { | |||
2052 | 2071 | ||
2053 | .load_balance = load_balance_fair, | 2072 | .load_balance = load_balance_fair, |
2054 | .move_one_task = move_one_task_fair, | 2073 | .move_one_task = move_one_task_fair, |
2074 | .rq_online = rq_online_fair, | ||
2075 | .rq_offline = rq_offline_fair, | ||
2055 | #endif | 2076 | #endif |
2056 | 2077 | ||
2057 | .set_curr_task = set_curr_task_fair, | 2078 | .set_curr_task = set_curr_task_fair, |
2058 | .task_tick = task_tick_fair, | 2079 | .task_tick = task_tick_fair, |
2059 | .task_new = task_new_fair, | 2080 | .task_fork = task_fork_fair, |
2060 | 2081 | ||
2061 | .prio_changed = prio_changed_fair, | 2082 | .prio_changed = prio_changed_fair, |
2062 | .switched_to = switched_to_fair, | 2083 | .switched_to = switched_to_fair, |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 0d94083582c7..d5059fd761d9 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -54,11 +54,6 @@ SCHED_FEAT(WAKEUP_SYNC, 0) | |||
54 | SCHED_FEAT(WAKEUP_OVERLAP, 0) | 54 | SCHED_FEAT(WAKEUP_OVERLAP, 0) |
55 | 55 | ||
56 | /* | 56 | /* |
57 | * Wakeup preemption towards tasks that run short | ||
58 | */ | ||
59 | SCHED_FEAT(WAKEUP_RUNNING, 0) | ||
60 | |||
61 | /* | ||
62 | * Use the SYNC wakeup hint, pipes and the likes use this to indicate | 57 | * Use the SYNC wakeup hint, pipes and the likes use this to indicate |
63 | * the remote end is likely to consume the data we just wrote, and | 58 | * the remote end is likely to consume the data we just wrote, and |
64 | * therefore has cache benefit from being placed on the same cpu, see | 59 | * therefore has cache benefit from being placed on the same cpu, see |
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index b133a28fcde3..33d5384a73a8 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
@@ -97,7 +97,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p, | |||
97 | check_preempt_curr(rq, p, 0); | 97 | check_preempt_curr(rq, p, 0); |
98 | } | 98 | } |
99 | 99 | ||
100 | unsigned int get_rr_interval_idle(struct task_struct *task) | 100 | unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task) |
101 | { | 101 | { |
102 | return 0; | 102 | return 0; |
103 | } | 103 | } |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 5c5fef378415..aecbd9c6b20c 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -1721,7 +1721,7 @@ static void set_curr_task_rt(struct rq *rq) | |||
1721 | dequeue_pushable_task(rq, p); | 1721 | dequeue_pushable_task(rq, p); |
1722 | } | 1722 | } |
1723 | 1723 | ||
1724 | unsigned int get_rr_interval_rt(struct task_struct *task) | 1724 | unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) |
1725 | { | 1725 | { |
1726 | /* | 1726 | /* |
1727 | * Time slice is 0 for SCHED_FIFO tasks | 1727 | * Time slice is 0 for SCHED_FIFO tasks |
diff --git a/kernel/sys.c b/kernel/sys.c index 9968c5fb55b9..585d6cd10040 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -8,7 +8,6 @@ | |||
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/utsname.h> | 9 | #include <linux/utsname.h> |
10 | #include <linux/mman.h> | 10 | #include <linux/mman.h> |
11 | #include <linux/smp_lock.h> | ||
12 | #include <linux/notifier.h> | 11 | #include <linux/notifier.h> |
13 | #include <linux/reboot.h> | 12 | #include <linux/reboot.h> |
14 | #include <linux/prctl.h> | 13 | #include <linux/prctl.h> |
@@ -349,6 +348,9 @@ void kernel_power_off(void) | |||
349 | machine_power_off(); | 348 | machine_power_off(); |
350 | } | 349 | } |
351 | EXPORT_SYMBOL_GPL(kernel_power_off); | 350 | EXPORT_SYMBOL_GPL(kernel_power_off); |
351 | |||
352 | static DEFINE_MUTEX(reboot_mutex); | ||
353 | |||
352 | /* | 354 | /* |
353 | * Reboot system call: for obvious reasons only root may call it, | 355 | * Reboot system call: for obvious reasons only root may call it, |
354 | * and even root needs to set up some magic numbers in the registers | 356 | * and even root needs to set up some magic numbers in the registers |
@@ -381,7 +383,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | |||
381 | if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) | 383 | if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) |
382 | cmd = LINUX_REBOOT_CMD_HALT; | 384 | cmd = LINUX_REBOOT_CMD_HALT; |
383 | 385 | ||
384 | lock_kernel(); | 386 | mutex_lock(&reboot_mutex); |
385 | switch (cmd) { | 387 | switch (cmd) { |
386 | case LINUX_REBOOT_CMD_RESTART: | 388 | case LINUX_REBOOT_CMD_RESTART: |
387 | kernel_restart(NULL); | 389 | kernel_restart(NULL); |
@@ -397,20 +399,18 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | |||
397 | 399 | ||
398 | case LINUX_REBOOT_CMD_HALT: | 400 | case LINUX_REBOOT_CMD_HALT: |
399 | kernel_halt(); | 401 | kernel_halt(); |
400 | unlock_kernel(); | ||
401 | do_exit(0); | 402 | do_exit(0); |
402 | panic("cannot halt"); | 403 | panic("cannot halt"); |
403 | 404 | ||
404 | case LINUX_REBOOT_CMD_POWER_OFF: | 405 | case LINUX_REBOOT_CMD_POWER_OFF: |
405 | kernel_power_off(); | 406 | kernel_power_off(); |
406 | unlock_kernel(); | ||
407 | do_exit(0); | 407 | do_exit(0); |
408 | break; | 408 | break; |
409 | 409 | ||
410 | case LINUX_REBOOT_CMD_RESTART2: | 410 | case LINUX_REBOOT_CMD_RESTART2: |
411 | if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { | 411 | if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { |
412 | unlock_kernel(); | 412 | ret = -EFAULT; |
413 | return -EFAULT; | 413 | break; |
414 | } | 414 | } |
415 | buffer[sizeof(buffer) - 1] = '\0'; | 415 | buffer[sizeof(buffer) - 1] = '\0'; |
416 | 416 | ||
@@ -433,7 +433,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | |||
433 | ret = -EINVAL; | 433 | ret = -EINVAL; |
434 | break; | 434 | break; |
435 | } | 435 | } |
436 | unlock_kernel(); | 436 | mutex_unlock(&reboot_mutex); |
437 | return ret; | 437 | return ret; |
438 | } | 438 | } |
439 | 439 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9327a26765c5..554ac4894f0f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -244,6 +244,10 @@ static int min_sched_granularity_ns = 100000; /* 100 usecs */ | |||
244 | static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ | 244 | static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ |
245 | static int min_wakeup_granularity_ns; /* 0 usecs */ | 245 | static int min_wakeup_granularity_ns; /* 0 usecs */ |
246 | static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ | 246 | static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ |
247 | static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; | ||
248 | static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; | ||
249 | static int min_sched_shares_ratelimit = 100000; /* 100 usec */ | ||
250 | static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */ | ||
247 | #endif | 251 | #endif |
248 | 252 | ||
249 | static struct ctl_table kern_table[] = { | 253 | static struct ctl_table kern_table[] = { |
@@ -260,7 +264,7 @@ static struct ctl_table kern_table[] = { | |||
260 | .data = &sysctl_sched_min_granularity, | 264 | .data = &sysctl_sched_min_granularity, |
261 | .maxlen = sizeof(unsigned int), | 265 | .maxlen = sizeof(unsigned int), |
262 | .mode = 0644, | 266 | .mode = 0644, |
263 | .proc_handler = sched_nr_latency_handler, | 267 | .proc_handler = sched_proc_update_handler, |
264 | .extra1 = &min_sched_granularity_ns, | 268 | .extra1 = &min_sched_granularity_ns, |
265 | .extra2 = &max_sched_granularity_ns, | 269 | .extra2 = &max_sched_granularity_ns, |
266 | }, | 270 | }, |
@@ -269,7 +273,7 @@ static struct ctl_table kern_table[] = { | |||
269 | .data = &sysctl_sched_latency, | 273 | .data = &sysctl_sched_latency, |
270 | .maxlen = sizeof(unsigned int), | 274 | .maxlen = sizeof(unsigned int), |
271 | .mode = 0644, | 275 | .mode = 0644, |
272 | .proc_handler = sched_nr_latency_handler, | 276 | .proc_handler = sched_proc_update_handler, |
273 | .extra1 = &min_sched_granularity_ns, | 277 | .extra1 = &min_sched_granularity_ns, |
274 | .extra2 = &max_sched_granularity_ns, | 278 | .extra2 = &max_sched_granularity_ns, |
275 | }, | 279 | }, |
@@ -278,7 +282,7 @@ static struct ctl_table kern_table[] = { | |||
278 | .data = &sysctl_sched_wakeup_granularity, | 282 | .data = &sysctl_sched_wakeup_granularity, |
279 | .maxlen = sizeof(unsigned int), | 283 | .maxlen = sizeof(unsigned int), |
280 | .mode = 0644, | 284 | .mode = 0644, |
281 | .proc_handler = proc_dointvec_minmax, | 285 | .proc_handler = sched_proc_update_handler, |
282 | .extra1 = &min_wakeup_granularity_ns, | 286 | .extra1 = &min_wakeup_granularity_ns, |
283 | .extra2 = &max_wakeup_granularity_ns, | 287 | .extra2 = &max_wakeup_granularity_ns, |
284 | }, | 288 | }, |
@@ -287,7 +291,18 @@ static struct ctl_table kern_table[] = { | |||
287 | .data = &sysctl_sched_shares_ratelimit, | 291 | .data = &sysctl_sched_shares_ratelimit, |
288 | .maxlen = sizeof(unsigned int), | 292 | .maxlen = sizeof(unsigned int), |
289 | .mode = 0644, | 293 | .mode = 0644, |
290 | .proc_handler = proc_dointvec, | 294 | .proc_handler = sched_proc_update_handler, |
295 | .extra1 = &min_sched_shares_ratelimit, | ||
296 | .extra2 = &max_sched_shares_ratelimit, | ||
297 | }, | ||
298 | { | ||
299 | .procname = "sched_tunable_scaling", | ||
300 | .data = &sysctl_sched_tunable_scaling, | ||
301 | .maxlen = sizeof(enum sched_tunable_scaling), | ||
302 | .mode = 0644, | ||
303 | .proc_handler = sched_proc_update_handler, | ||
304 | .extra1 = &min_sched_tunable_scaling, | ||
305 | .extra2 = &max_sched_tunable_scaling, | ||
291 | }, | 306 | }, |
292 | { | 307 | { |
293 | .procname = "sched_shares_thresh", | 308 | .procname = "sched_shares_thresh", |
@@ -298,13 +313,6 @@ static struct ctl_table kern_table[] = { | |||
298 | .extra1 = &zero, | 313 | .extra1 = &zero, |
299 | }, | 314 | }, |
300 | { | 315 | { |
301 | .procname = "sched_features", | ||
302 | .data = &sysctl_sched_features, | ||
303 | .maxlen = sizeof(unsigned int), | ||
304 | .mode = 0644, | ||
305 | .proc_handler = proc_dointvec, | ||
306 | }, | ||
307 | { | ||
308 | .procname = "sched_migration_cost", | 316 | .procname = "sched_migration_cost", |
309 | .data = &sysctl_sched_migration_cost, | 317 | .data = &sysctl_sched_migration_cost, |
310 | .maxlen = sizeof(unsigned int), | 318 | .maxlen = sizeof(unsigned int), |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index d422c7b2236b..e85c23404d34 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -677,7 +677,7 @@ sysfs_show_current_clocksources(struct sys_device *dev, | |||
677 | * @count: length of buffer | 677 | * @count: length of buffer |
678 | * | 678 | * |
679 | * Takes input from sysfs interface for manually overriding the default | 679 | * Takes input from sysfs interface for manually overriding the default |
680 | * clocksource selction. | 680 | * clocksource selection. |
681 | */ | 681 | */ |
682 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, | 682 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, |
683 | struct sysdev_attribute *attr, | 683 | struct sysdev_attribute *attr, |
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 665c76edbf17..9d80db4747d4 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -150,6 +150,9 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) | |||
150 | P_ns(expires_next); | 150 | P_ns(expires_next); |
151 | P(hres_active); | 151 | P(hres_active); |
152 | P(nr_events); | 152 | P(nr_events); |
153 | P(nr_retries); | ||
154 | P(nr_hangs); | ||
155 | P_ns(max_hang_time); | ||
153 | #endif | 156 | #endif |
154 | #undef P | 157 | #undef P |
155 | #undef P_ns | 158 | #undef P_ns |
@@ -254,7 +257,7 @@ static int timer_list_show(struct seq_file *m, void *v) | |||
254 | u64 now = ktime_to_ns(ktime_get()); | 257 | u64 now = ktime_to_ns(ktime_get()); |
255 | int cpu; | 258 | int cpu; |
256 | 259 | ||
257 | SEQ_printf(m, "Timer List Version: v0.4\n"); | 260 | SEQ_printf(m, "Timer List Version: v0.5\n"); |
258 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); | 261 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); |
259 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); | 262 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); |
260 | 263 | ||
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6ed223447a3f..7ecab06547a5 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -606,23 +606,22 @@ static int create_trace_probe(int argc, char **argv) | |||
606 | */ | 606 | */ |
607 | struct trace_probe *tp; | 607 | struct trace_probe *tp; |
608 | int i, ret = 0; | 608 | int i, ret = 0; |
609 | int is_return = 0; | 609 | int is_return = 0, is_delete = 0; |
610 | char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; | 610 | char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; |
611 | unsigned long offset = 0; | 611 | unsigned long offset = 0; |
612 | void *addr = NULL; | 612 | void *addr = NULL; |
613 | char buf[MAX_EVENT_NAME_LEN]; | 613 | char buf[MAX_EVENT_NAME_LEN]; |
614 | 614 | ||
615 | if (argc < 2) { | 615 | /* argc must be >= 1 */ |
616 | pr_info("Probe point is not specified.\n"); | ||
617 | return -EINVAL; | ||
618 | } | ||
619 | |||
620 | if (argv[0][0] == 'p') | 616 | if (argv[0][0] == 'p') |
621 | is_return = 0; | 617 | is_return = 0; |
622 | else if (argv[0][0] == 'r') | 618 | else if (argv[0][0] == 'r') |
623 | is_return = 1; | 619 | is_return = 1; |
620 | else if (argv[0][0] == '-') | ||
621 | is_delete = 1; | ||
624 | else { | 622 | else { |
625 | pr_info("Probe definition must be started with 'p' or 'r'.\n"); | 623 | pr_info("Probe definition must be started with 'p', 'r' or" |
624 | " '-'.\n"); | ||
626 | return -EINVAL; | 625 | return -EINVAL; |
627 | } | 626 | } |
628 | 627 | ||
@@ -642,7 +641,29 @@ static int create_trace_probe(int argc, char **argv) | |||
642 | return -EINVAL; | 641 | return -EINVAL; |
643 | } | 642 | } |
644 | } | 643 | } |
644 | if (!group) | ||
645 | group = KPROBE_EVENT_SYSTEM; | ||
645 | 646 | ||
647 | if (is_delete) { | ||
648 | if (!event) { | ||
649 | pr_info("Delete command needs an event name.\n"); | ||
650 | return -EINVAL; | ||
651 | } | ||
652 | tp = find_probe_event(event, group); | ||
653 | if (!tp) { | ||
654 | pr_info("Event %s/%s doesn't exist.\n", group, event); | ||
655 | return -ENOENT; | ||
656 | } | ||
657 | /* delete an event */ | ||
658 | unregister_trace_probe(tp); | ||
659 | free_trace_probe(tp); | ||
660 | return 0; | ||
661 | } | ||
662 | |||
663 | if (argc < 2) { | ||
664 | pr_info("Probe point is not specified.\n"); | ||
665 | return -EINVAL; | ||
666 | } | ||
646 | if (isdigit(argv[1][0])) { | 667 | if (isdigit(argv[1][0])) { |
647 | if (is_return) { | 668 | if (is_return) { |
648 | pr_info("Return probe point must be a symbol.\n"); | 669 | pr_info("Return probe point must be a symbol.\n"); |
@@ -671,8 +692,6 @@ static int create_trace_probe(int argc, char **argv) | |||
671 | argc -= 2; argv += 2; | 692 | argc -= 2; argv += 2; |
672 | 693 | ||
673 | /* setup a probe */ | 694 | /* setup a probe */ |
674 | if (!group) | ||
675 | group = KPROBE_EVENT_SYSTEM; | ||
676 | if (!event) { | 695 | if (!event) { |
677 | /* Make a new event name */ | 696 | /* Make a new event name */ |
678 | if (symbol) | 697 | if (symbol) |
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 48f1c6c248c6..faf37fa4408c 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c | |||
@@ -79,11 +79,12 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) | |||
79 | } | 79 | } |
80 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | 80 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ |
81 | 81 | ||
82 | void ksym_hbp_handler(struct perf_event *hbp, void *data) | 82 | void ksym_hbp_handler(struct perf_event *hbp, int nmi, |
83 | struct perf_sample_data *data, | ||
84 | struct pt_regs *regs) | ||
83 | { | 85 | { |
84 | struct ring_buffer_event *event; | 86 | struct ring_buffer_event *event; |
85 | struct ksym_trace_entry *entry; | 87 | struct ksym_trace_entry *entry; |
86 | struct pt_regs *regs = data; | ||
87 | struct ring_buffer *buffer; | 88 | struct ring_buffer *buffer; |
88 | int pc; | 89 | int pc; |
89 | 90 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 67e526b6ae81..dee48658805c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -68,6 +68,116 @@ struct workqueue_struct { | |||
68 | #endif | 68 | #endif |
69 | }; | 69 | }; |
70 | 70 | ||
71 | #ifdef CONFIG_DEBUG_OBJECTS_WORK | ||
72 | |||
73 | static struct debug_obj_descr work_debug_descr; | ||
74 | |||
75 | /* | ||
76 | * fixup_init is called when: | ||
77 | * - an active object is initialized | ||
78 | */ | ||
79 | static int work_fixup_init(void *addr, enum debug_obj_state state) | ||
80 | { | ||
81 | struct work_struct *work = addr; | ||
82 | |||
83 | switch (state) { | ||
84 | case ODEBUG_STATE_ACTIVE: | ||
85 | cancel_work_sync(work); | ||
86 | debug_object_init(work, &work_debug_descr); | ||
87 | return 1; | ||
88 | default: | ||
89 | return 0; | ||
90 | } | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * fixup_activate is called when: | ||
95 | * - an active object is activated | ||
96 | * - an unknown object is activated (might be a statically initialized object) | ||
97 | */ | ||
98 | static int work_fixup_activate(void *addr, enum debug_obj_state state) | ||
99 | { | ||
100 | struct work_struct *work = addr; | ||
101 | |||
102 | switch (state) { | ||
103 | |||
104 | case ODEBUG_STATE_NOTAVAILABLE: | ||
105 | /* | ||
106 | * This is not really a fixup. The work struct was | ||
107 | * statically initialized. We just make sure that it | ||
108 | * is tracked in the object tracker. | ||
109 | */ | ||
110 | if (test_bit(WORK_STRUCT_STATIC, work_data_bits(work))) { | ||
111 | debug_object_init(work, &work_debug_descr); | ||
112 | debug_object_activate(work, &work_debug_descr); | ||
113 | return 0; | ||
114 | } | ||
115 | WARN_ON_ONCE(1); | ||
116 | return 0; | ||
117 | |||
118 | case ODEBUG_STATE_ACTIVE: | ||
119 | WARN_ON(1); | ||
120 | |||
121 | default: | ||
122 | return 0; | ||
123 | } | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * fixup_free is called when: | ||
128 | * - an active object is freed | ||
129 | */ | ||
130 | static int work_fixup_free(void *addr, enum debug_obj_state state) | ||
131 | { | ||
132 | struct work_struct *work = addr; | ||
133 | |||
134 | switch (state) { | ||
135 | case ODEBUG_STATE_ACTIVE: | ||
136 | cancel_work_sync(work); | ||
137 | debug_object_free(work, &work_debug_descr); | ||
138 | return 1; | ||
139 | default: | ||
140 | return 0; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | static struct debug_obj_descr work_debug_descr = { | ||
145 | .name = "work_struct", | ||
146 | .fixup_init = work_fixup_init, | ||
147 | .fixup_activate = work_fixup_activate, | ||
148 | .fixup_free = work_fixup_free, | ||
149 | }; | ||
150 | |||
151 | static inline void debug_work_activate(struct work_struct *work) | ||
152 | { | ||
153 | debug_object_activate(work, &work_debug_descr); | ||
154 | } | ||
155 | |||
156 | static inline void debug_work_deactivate(struct work_struct *work) | ||
157 | { | ||
158 | debug_object_deactivate(work, &work_debug_descr); | ||
159 | } | ||
160 | |||
161 | void __init_work(struct work_struct *work, int onstack) | ||
162 | { | ||
163 | if (onstack) | ||
164 | debug_object_init_on_stack(work, &work_debug_descr); | ||
165 | else | ||
166 | debug_object_init(work, &work_debug_descr); | ||
167 | } | ||
168 | EXPORT_SYMBOL_GPL(__init_work); | ||
169 | |||
170 | void destroy_work_on_stack(struct work_struct *work) | ||
171 | { | ||
172 | debug_object_free(work, &work_debug_descr); | ||
173 | } | ||
174 | EXPORT_SYMBOL_GPL(destroy_work_on_stack); | ||
175 | |||
176 | #else | ||
177 | static inline void debug_work_activate(struct work_struct *work) { } | ||
178 | static inline void debug_work_deactivate(struct work_struct *work) { } | ||
179 | #endif | ||
180 | |||
71 | /* Serializes the accesses to the list of workqueues. */ | 181 | /* Serializes the accesses to the list of workqueues. */ |
72 | static DEFINE_SPINLOCK(workqueue_lock); | 182 | static DEFINE_SPINLOCK(workqueue_lock); |
73 | static LIST_HEAD(workqueues); | 183 | static LIST_HEAD(workqueues); |
@@ -145,6 +255,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, | |||
145 | { | 255 | { |
146 | unsigned long flags; | 256 | unsigned long flags; |
147 | 257 | ||
258 | debug_work_activate(work); | ||
148 | spin_lock_irqsave(&cwq->lock, flags); | 259 | spin_lock_irqsave(&cwq->lock, flags); |
149 | insert_work(cwq, work, &cwq->worklist); | 260 | insert_work(cwq, work, &cwq->worklist); |
150 | spin_unlock_irqrestore(&cwq->lock, flags); | 261 | spin_unlock_irqrestore(&cwq->lock, flags); |
@@ -280,6 +391,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq) | |||
280 | struct lockdep_map lockdep_map = work->lockdep_map; | 391 | struct lockdep_map lockdep_map = work->lockdep_map; |
281 | #endif | 392 | #endif |
282 | trace_workqueue_execution(cwq->thread, work); | 393 | trace_workqueue_execution(cwq->thread, work); |
394 | debug_work_deactivate(work); | ||
283 | cwq->current_work = work; | 395 | cwq->current_work = work; |
284 | list_del_init(cwq->worklist.next); | 396 | list_del_init(cwq->worklist.next); |
285 | spin_unlock_irq(&cwq->lock); | 397 | spin_unlock_irq(&cwq->lock); |
@@ -350,11 +462,18 @@ static void wq_barrier_func(struct work_struct *work) | |||
350 | static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, | 462 | static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, |
351 | struct wq_barrier *barr, struct list_head *head) | 463 | struct wq_barrier *barr, struct list_head *head) |
352 | { | 464 | { |
353 | INIT_WORK(&barr->work, wq_barrier_func); | 465 | /* |
466 | * debugobject calls are safe here even with cwq->lock locked | ||
467 | * as we know for sure that this will not trigger any of the | ||
468 | * checks and call back into the fixup functions where we | ||
469 | * might deadlock. | ||
470 | */ | ||
471 | INIT_WORK_ON_STACK(&barr->work, wq_barrier_func); | ||
354 | __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work)); | 472 | __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work)); |
355 | 473 | ||
356 | init_completion(&barr->done); | 474 | init_completion(&barr->done); |
357 | 475 | ||
476 | debug_work_activate(&barr->work); | ||
358 | insert_work(cwq, &barr->work, head); | 477 | insert_work(cwq, &barr->work, head); |
359 | } | 478 | } |
360 | 479 | ||
@@ -372,8 +491,10 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) | |||
372 | } | 491 | } |
373 | spin_unlock_irq(&cwq->lock); | 492 | spin_unlock_irq(&cwq->lock); |
374 | 493 | ||
375 | if (active) | 494 | if (active) { |
376 | wait_for_completion(&barr.done); | 495 | wait_for_completion(&barr.done); |
496 | destroy_work_on_stack(&barr.work); | ||
497 | } | ||
377 | 498 | ||
378 | return active; | 499 | return active; |
379 | } | 500 | } |
@@ -451,6 +572,7 @@ out: | |||
451 | return 0; | 572 | return 0; |
452 | 573 | ||
453 | wait_for_completion(&barr.done); | 574 | wait_for_completion(&barr.done); |
575 | destroy_work_on_stack(&barr.work); | ||
454 | return 1; | 576 | return 1; |
455 | } | 577 | } |
456 | EXPORT_SYMBOL_GPL(flush_work); | 578 | EXPORT_SYMBOL_GPL(flush_work); |
@@ -485,6 +607,7 @@ static int try_to_grab_pending(struct work_struct *work) | |||
485 | */ | 607 | */ |
486 | smp_rmb(); | 608 | smp_rmb(); |
487 | if (cwq == get_wq_data(work)) { | 609 | if (cwq == get_wq_data(work)) { |
610 | debug_work_deactivate(work); | ||
488 | list_del_init(&work->entry); | 611 | list_del_init(&work->entry); |
489 | ret = 1; | 612 | ret = 1; |
490 | } | 613 | } |
@@ -507,8 +630,10 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq, | |||
507 | } | 630 | } |
508 | spin_unlock_irq(&cwq->lock); | 631 | spin_unlock_irq(&cwq->lock); |
509 | 632 | ||
510 | if (unlikely(running)) | 633 | if (unlikely(running)) { |
511 | wait_for_completion(&barr.done); | 634 | wait_for_completion(&barr.done); |
635 | destroy_work_on_stack(&barr.work); | ||
636 | } | ||
512 | } | 637 | } |
513 | 638 | ||
514 | static void wait_on_work(struct work_struct *work) | 639 | static void wait_on_work(struct work_struct *work) |