diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-11-21 18:44:54 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-11-21 18:44:54 -0500 |
commit | 8b2ed21e846c63d8f1bdee0d8df0645721a604a1 (patch) | |
tree | 459906e25111254256e4873043da98a6e4055564 | |
parent | 13f5004c94785af107dd702d9fbbe160f1004064 (diff) | |
parent | 6e998916dfe327e785e7c2447959b2c1a3ea4930 (diff) |
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar:
"Misc fixes: two NUMA fixes, two cputime fixes and an RCU/lockdep fix"
* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/cputime: Fix clock_nanosleep()/clock_gettime() inconsistency
sched/cputime: Fix cpu_timer_sample_group() double accounting
sched/numa: Avoid selecting oneself as swap target
sched/numa: Fix out of bounds read in sched_init_numa()
sched: Remove lockdep check in sched_move_task()
-rw-r--r-- | include/linux/kernel_stat.h | 5 | ||||
-rw-r--r-- | kernel/sched/core.c | 63 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 2 | ||||
-rw-r--r-- | kernel/sched/fair.c | 14 | ||||
-rw-r--r-- | kernel/sched/rt.c | 2 | ||||
-rw-r--r-- | kernel/sched/sched.h | 2 | ||||
-rw-r--r-- | kernel/time/posix-cpu-timers.c | 2 |
7 files changed, 42 insertions, 48 deletions
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 8422b4ed6882..b9376cd5a187 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h | |||
@@ -77,11 +77,6 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) | |||
77 | return kstat_cpu(cpu).irqs_sum; | 77 | return kstat_cpu(cpu).irqs_sum; |
78 | } | 78 | } |
79 | 79 | ||
80 | /* | ||
81 | * Lock/unlock the current runqueue - to extract task statistics: | ||
82 | */ | ||
83 | extern unsigned long long task_delta_exec(struct task_struct *); | ||
84 | |||
85 | extern void account_user_time(struct task_struct *, cputime_t, cputime_t); | 80 | extern void account_user_time(struct task_struct *, cputime_t, cputime_t); |
86 | extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); | 81 | extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); |
87 | extern void account_steal_time(cputime_t); | 82 | extern void account_steal_time(cputime_t); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 240157c13ddc..24beb9bb4c3e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2475,44 +2475,6 @@ EXPORT_PER_CPU_SYMBOL(kstat); | |||
2475 | EXPORT_PER_CPU_SYMBOL(kernel_cpustat); | 2475 | EXPORT_PER_CPU_SYMBOL(kernel_cpustat); |
2476 | 2476 | ||
2477 | /* | 2477 | /* |
2478 | * Return any ns on the sched_clock that have not yet been accounted in | ||
2479 | * @p in case that task is currently running. | ||
2480 | * | ||
2481 | * Called with task_rq_lock() held on @rq. | ||
2482 | */ | ||
2483 | static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | ||
2484 | { | ||
2485 | u64 ns = 0; | ||
2486 | |||
2487 | /* | ||
2488 | * Must be ->curr _and_ ->on_rq. If dequeued, we would | ||
2489 | * project cycles that may never be accounted to this | ||
2490 | * thread, breaking clock_gettime(). | ||
2491 | */ | ||
2492 | if (task_current(rq, p) && task_on_rq_queued(p)) { | ||
2493 | update_rq_clock(rq); | ||
2494 | ns = rq_clock_task(rq) - p->se.exec_start; | ||
2495 | if ((s64)ns < 0) | ||
2496 | ns = 0; | ||
2497 | } | ||
2498 | |||
2499 | return ns; | ||
2500 | } | ||
2501 | |||
2502 | unsigned long long task_delta_exec(struct task_struct *p) | ||
2503 | { | ||
2504 | unsigned long flags; | ||
2505 | struct rq *rq; | ||
2506 | u64 ns = 0; | ||
2507 | |||
2508 | rq = task_rq_lock(p, &flags); | ||
2509 | ns = do_task_delta_exec(p, rq); | ||
2510 | task_rq_unlock(rq, p, &flags); | ||
2511 | |||
2512 | return ns; | ||
2513 | } | ||
2514 | |||
2515 | /* | ||
2516 | * Return accounted runtime for the task. | 2478 | * Return accounted runtime for the task. |
2517 | * In case the task is currently running, return the runtime plus current's | 2479 | * In case the task is currently running, return the runtime plus current's |
2518 | * pending runtime that have not been accounted yet. | 2480 | * pending runtime that have not been accounted yet. |
@@ -2521,7 +2483,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
2521 | { | 2483 | { |
2522 | unsigned long flags; | 2484 | unsigned long flags; |
2523 | struct rq *rq; | 2485 | struct rq *rq; |
2524 | u64 ns = 0; | 2486 | u64 ns; |
2525 | 2487 | ||
2526 | #if defined(CONFIG_64BIT) && defined(CONFIG_SMP) | 2488 | #if defined(CONFIG_64BIT) && defined(CONFIG_SMP) |
2527 | /* | 2489 | /* |
@@ -2540,7 +2502,16 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
2540 | #endif | 2502 | #endif |
2541 | 2503 | ||
2542 | rq = task_rq_lock(p, &flags); | 2504 | rq = task_rq_lock(p, &flags); |
2543 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); | 2505 | /* |
2506 | * Must be ->curr _and_ ->on_rq. If dequeued, we would | ||
2507 | * project cycles that may never be accounted to this | ||
2508 | * thread, breaking clock_gettime(). | ||
2509 | */ | ||
2510 | if (task_current(rq, p) && task_on_rq_queued(p)) { | ||
2511 | update_rq_clock(rq); | ||
2512 | p->sched_class->update_curr(rq); | ||
2513 | } | ||
2514 | ns = p->se.sum_exec_runtime; | ||
2544 | task_rq_unlock(rq, p, &flags); | 2515 | task_rq_unlock(rq, p, &flags); |
2545 | 2516 | ||
2546 | return ns; | 2517 | return ns; |
@@ -6368,6 +6339,10 @@ static void sched_init_numa(void) | |||
6368 | if (!sched_debug()) | 6339 | if (!sched_debug()) |
6369 | break; | 6340 | break; |
6370 | } | 6341 | } |
6342 | |||
6343 | if (!level) | ||
6344 | return; | ||
6345 | |||
6371 | /* | 6346 | /* |
6372 | * 'level' contains the number of unique distances, excluding the | 6347 | * 'level' contains the number of unique distances, excluding the |
6373 | * identity distance node_distance(i,i). | 6348 | * identity distance node_distance(i,i). |
@@ -7444,8 +7419,12 @@ void sched_move_task(struct task_struct *tsk) | |||
7444 | if (unlikely(running)) | 7419 | if (unlikely(running)) |
7445 | put_prev_task(rq, tsk); | 7420 | put_prev_task(rq, tsk); |
7446 | 7421 | ||
7447 | tg = container_of(task_css_check(tsk, cpu_cgrp_id, | 7422 | /* |
7448 | lockdep_is_held(&tsk->sighand->siglock)), | 7423 | * All callers are synchronized by task_rq_lock(); we do not use RCU |
7424 | * which is pointless here. Thus, we pass "true" to task_css_check() | ||
7425 | * to prevent lockdep warnings. | ||
7426 | */ | ||
7427 | tg = container_of(task_css_check(tsk, cpu_cgrp_id, true), | ||
7449 | struct task_group, css); | 7428 | struct task_group, css); |
7450 | tg = autogroup_task_group(tsk, tg); | 7429 | tg = autogroup_task_group(tsk, tg); |
7451 | tsk->sched_task_group = tg; | 7430 | tsk->sched_task_group = tg; |
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 5285332392d5..28fa9d9e9201 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -1701,4 +1701,6 @@ const struct sched_class dl_sched_class = { | |||
1701 | .prio_changed = prio_changed_dl, | 1701 | .prio_changed = prio_changed_dl, |
1702 | .switched_from = switched_from_dl, | 1702 | .switched_from = switched_from_dl, |
1703 | .switched_to = switched_to_dl, | 1703 | .switched_to = switched_to_dl, |
1704 | |||
1705 | .update_curr = update_curr_dl, | ||
1704 | }; | 1706 | }; |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 34baa60f8a7b..ef2b104b254c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -726,6 +726,11 @@ static void update_curr(struct cfs_rq *cfs_rq) | |||
726 | account_cfs_rq_runtime(cfs_rq, delta_exec); | 726 | account_cfs_rq_runtime(cfs_rq, delta_exec); |
727 | } | 727 | } |
728 | 728 | ||
729 | static void update_curr_fair(struct rq *rq) | ||
730 | { | ||
731 | update_curr(cfs_rq_of(&rq->curr->se)); | ||
732 | } | ||
733 | |||
729 | static inline void | 734 | static inline void |
730 | update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | 735 | update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) |
731 | { | 736 | { |
@@ -1180,6 +1185,13 @@ static void task_numa_compare(struct task_numa_env *env, | |||
1180 | raw_spin_unlock_irq(&dst_rq->lock); | 1185 | raw_spin_unlock_irq(&dst_rq->lock); |
1181 | 1186 | ||
1182 | /* | 1187 | /* |
1188 | * Because we have preemption enabled we can get migrated around and | ||
1189 | * end try selecting ourselves (current == env->p) as a swap candidate. | ||
1190 | */ | ||
1191 | if (cur == env->p) | ||
1192 | goto unlock; | ||
1193 | |||
1194 | /* | ||
1183 | * "imp" is the fault differential for the source task between the | 1195 | * "imp" is the fault differential for the source task between the |
1184 | * source and destination node. Calculate the total differential for | 1196 | * source and destination node. Calculate the total differential for |
1185 | * the source task and potential destination task. The more negative | 1197 | * the source task and potential destination task. The more negative |
@@ -7949,6 +7961,8 @@ const struct sched_class fair_sched_class = { | |||
7949 | 7961 | ||
7950 | .get_rr_interval = get_rr_interval_fair, | 7962 | .get_rr_interval = get_rr_interval_fair, |
7951 | 7963 | ||
7964 | .update_curr = update_curr_fair, | ||
7965 | |||
7952 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7966 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7953 | .task_move_group = task_move_group_fair, | 7967 | .task_move_group = task_move_group_fair, |
7954 | #endif | 7968 | #endif |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d024e6ce30ba..20bca398084a 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -2128,6 +2128,8 @@ const struct sched_class rt_sched_class = { | |||
2128 | 2128 | ||
2129 | .prio_changed = prio_changed_rt, | 2129 | .prio_changed = prio_changed_rt, |
2130 | .switched_to = switched_to_rt, | 2130 | .switched_to = switched_to_rt, |
2131 | |||
2132 | .update_curr = update_curr_rt, | ||
2131 | }; | 2133 | }; |
2132 | 2134 | ||
2133 | #ifdef CONFIG_SCHED_DEBUG | 2135 | #ifdef CONFIG_SCHED_DEBUG |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 24156c8434d1..2df8ef067cc5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1135,6 +1135,8 @@ struct sched_class { | |||
1135 | unsigned int (*get_rr_interval) (struct rq *rq, | 1135 | unsigned int (*get_rr_interval) (struct rq *rq, |
1136 | struct task_struct *task); | 1136 | struct task_struct *task); |
1137 | 1137 | ||
1138 | void (*update_curr) (struct rq *rq); | ||
1139 | |||
1138 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1140 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1139 | void (*task_move_group) (struct task_struct *p, int on_rq); | 1141 | void (*task_move_group) (struct task_struct *p, int on_rq); |
1140 | #endif | 1142 | #endif |
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 492b986195d5..a16b67859e2a 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c | |||
@@ -553,7 +553,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock, | |||
553 | *sample = cputime_to_expires(cputime.utime); | 553 | *sample = cputime_to_expires(cputime.utime); |
554 | break; | 554 | break; |
555 | case CPUCLOCK_SCHED: | 555 | case CPUCLOCK_SCHED: |
556 | *sample = cputime.sum_exec_runtime + task_delta_exec(p); | 556 | *sample = cputime.sum_exec_runtime; |
557 | break; | 557 | break; |
558 | } | 558 | } |
559 | return 0; | 559 | return 0; |