aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c118
1 files changed, 74 insertions, 44 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 44999505e1bf..89e7283015a6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2475,44 +2475,6 @@ EXPORT_PER_CPU_SYMBOL(kstat);
2475EXPORT_PER_CPU_SYMBOL(kernel_cpustat); 2475EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
2476 2476
2477/* 2477/*
2478 * Return any ns on the sched_clock that have not yet been accounted in
2479 * @p in case that task is currently running.
2480 *
2481 * Called with task_rq_lock() held on @rq.
2482 */
2483static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
2484{
2485 u64 ns = 0;
2486
2487 /*
2488 * Must be ->curr _and_ ->on_rq. If dequeued, we would
2489 * project cycles that may never be accounted to this
2490 * thread, breaking clock_gettime().
2491 */
2492 if (task_current(rq, p) && task_on_rq_queued(p)) {
2493 update_rq_clock(rq);
2494 ns = rq_clock_task(rq) - p->se.exec_start;
2495 if ((s64)ns < 0)
2496 ns = 0;
2497 }
2498
2499 return ns;
2500}
2501
2502unsigned long long task_delta_exec(struct task_struct *p)
2503{
2504 unsigned long flags;
2505 struct rq *rq;
2506 u64 ns = 0;
2507
2508 rq = task_rq_lock(p, &flags);
2509 ns = do_task_delta_exec(p, rq);
2510 task_rq_unlock(rq, p, &flags);
2511
2512 return ns;
2513}
2514
2515/*
2516 * Return accounted runtime for the task. 2478 * Return accounted runtime for the task.
2517 * In case the task is currently running, return the runtime plus current's 2479 * In case the task is currently running, return the runtime plus current's
2518 * pending runtime that have not been accounted yet. 2480 * pending runtime that have not been accounted yet.
@@ -2521,7 +2483,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
2521{ 2483{
2522 unsigned long flags; 2484 unsigned long flags;
2523 struct rq *rq; 2485 struct rq *rq;
2524 u64 ns = 0; 2486 u64 ns;
2525 2487
2526#if defined(CONFIG_64BIT) && defined(CONFIG_SMP) 2488#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
2527 /* 2489 /*
@@ -2540,7 +2502,16 @@ unsigned long long task_sched_runtime(struct task_struct *p)
2540#endif 2502#endif
2541 2503
2542 rq = task_rq_lock(p, &flags); 2504 rq = task_rq_lock(p, &flags);
2543 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); 2505 /*
2506 * Must be ->curr _and_ ->on_rq. If dequeued, we would
2507 * project cycles that may never be accounted to this
2508 * thread, breaking clock_gettime().
2509 */
2510 if (task_current(rq, p) && task_on_rq_queued(p)) {
2511 update_rq_clock(rq);
2512 p->sched_class->update_curr(rq);
2513 }
2514 ns = p->se.sum_exec_runtime;
2544 task_rq_unlock(rq, p, &flags); 2515 task_rq_unlock(rq, p, &flags);
2545 2516
2546 return ns; 2517 return ns;
@@ -2903,10 +2874,14 @@ asmlinkage __visible void __sched schedule_user(void)
2903 * or we have been woken up remotely but the IPI has not yet arrived, 2874 * or we have been woken up remotely but the IPI has not yet arrived,
2904 * we haven't yet exited the RCU idle mode. Do it here manually until 2875 * we haven't yet exited the RCU idle mode. Do it here manually until
2905 * we find a better solution. 2876 * we find a better solution.
2877 *
2878 * NB: There are buggy callers of this function. Ideally we
2879 * should warn if prev_state != IN_USER, but that will trigger
2880 * too frequently to make sense yet.
2906 */ 2881 */
2907 user_exit(); 2882 enum ctx_state prev_state = exception_enter();
2908 schedule(); 2883 schedule();
2909 user_enter(); 2884 exception_exit(prev_state);
2910} 2885}
2911#endif 2886#endif
2912 2887
@@ -2951,6 +2926,47 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
2951} 2926}
2952NOKPROBE_SYMBOL(preempt_schedule); 2927NOKPROBE_SYMBOL(preempt_schedule);
2953EXPORT_SYMBOL(preempt_schedule); 2928EXPORT_SYMBOL(preempt_schedule);
2929
2930#ifdef CONFIG_CONTEXT_TRACKING
2931/**
2932 * preempt_schedule_context - preempt_schedule called by tracing
2933 *
2934 * The tracing infrastructure uses preempt_enable_notrace to prevent
2935 * recursion and tracing preempt enabling caused by the tracing
2936 * infrastructure itself. But as tracing can happen in areas coming
2937 * from userspace or just about to enter userspace, a preempt enable
2938 * can occur before user_exit() is called. This will cause the scheduler
2939 * to be called when the system is still in usermode.
2940 *
2941 * To prevent this, the preempt_enable_notrace will use this function
2942 * instead of preempt_schedule() to exit user context if needed before
2943 * calling the scheduler.
2944 */
2945asmlinkage __visible void __sched notrace preempt_schedule_context(void)
2946{
2947 enum ctx_state prev_ctx;
2948
2949 if (likely(!preemptible()))
2950 return;
2951
2952 do {
2953 __preempt_count_add(PREEMPT_ACTIVE);
2954 /*
2955 * Needs preempt disabled in case user_exit() is traced
2956 * and the tracer calls preempt_enable_notrace() causing
2957 * an infinite recursion.
2958 */
2959 prev_ctx = exception_enter();
2960 __schedule();
2961 exception_exit(prev_ctx);
2962
2963 __preempt_count_sub(PREEMPT_ACTIVE);
2964 barrier();
2965 } while (need_resched());
2966}
2967EXPORT_SYMBOL_GPL(preempt_schedule_context);
2968#endif /* CONFIG_CONTEXT_TRACKING */
2969
2954#endif /* CONFIG_PREEMPT */ 2970#endif /* CONFIG_PREEMPT */
2955 2971
2956/* 2972/*
@@ -6327,6 +6343,10 @@ static void sched_init_numa(void)
6327 if (!sched_debug()) 6343 if (!sched_debug())
6328 break; 6344 break;
6329 } 6345 }
6346
6347 if (!level)
6348 return;
6349
6330 /* 6350 /*
6331 * 'level' contains the number of unique distances, excluding the 6351 * 'level' contains the number of unique distances, excluding the
6332 * identity distance node_distance(i,i). 6352 * identity distance node_distance(i,i).
@@ -7403,8 +7423,12 @@ void sched_move_task(struct task_struct *tsk)
7403 if (unlikely(running)) 7423 if (unlikely(running))
7404 put_prev_task(rq, tsk); 7424 put_prev_task(rq, tsk);
7405 7425
7406 tg = container_of(task_css_check(tsk, cpu_cgrp_id, 7426 /*
7407 lockdep_is_held(&tsk->sighand->siglock)), 7427 * All callers are synchronized by task_rq_lock(); we do not use RCU
7428 * which is pointless here. Thus, we pass "true" to task_css_check()
7429 * to prevent lockdep warnings.
7430 */
7431 tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
7408 struct task_group, css); 7432 struct task_group, css);
7409 tg = autogroup_task_group(tsk, tg); 7433 tg = autogroup_task_group(tsk, tg);
7410 tsk->sched_task_group = tg; 7434 tsk->sched_task_group = tg;
@@ -7833,6 +7857,11 @@ static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
7833 sched_offline_group(tg); 7857 sched_offline_group(tg);
7834} 7858}
7835 7859
7860static void cpu_cgroup_fork(struct task_struct *task)
7861{
7862 sched_move_task(task);
7863}
7864
7836static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, 7865static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
7837 struct cgroup_taskset *tset) 7866 struct cgroup_taskset *tset)
7838{ 7867{
@@ -8205,6 +8234,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
8205 .css_free = cpu_cgroup_css_free, 8234 .css_free = cpu_cgroup_css_free,
8206 .css_online = cpu_cgroup_css_online, 8235 .css_online = cpu_cgroup_css_online,
8207 .css_offline = cpu_cgroup_css_offline, 8236 .css_offline = cpu_cgroup_css_offline,
8237 .fork = cpu_cgroup_fork,
8208 .can_attach = cpu_cgroup_can_attach, 8238 .can_attach = cpu_cgroup_can_attach,
8209 .attach = cpu_cgroup_attach, 8239 .attach = cpu_cgroup_attach,
8210 .exit = cpu_cgroup_exit, 8240 .exit = cpu_cgroup_exit,