diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-11-09 18:27:54 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-11-09 18:27:54 -0500 |
commit | a70a93229943c177f0062490b4f8e44be4cef685 (patch) | |
tree | 24cc6f087307f18cda2f55ad91c7649dd5388b86 | |
parent | a80b824f0b63fa3a8c269903828beb0837d738e7 (diff) | |
parent | e6fe6649b4ec11aa3075e394b4d8743eebe1f64c (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched
* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched:
sched: proper prototype for kernel/sched.c:migration_init()
sched: avoid large irq-latencies in smp-balancing
sched: fix copy_namespace() <-> sched_fork() dependency in do_fork
sched: clean up the wakeup preempt check, #2
sched: clean up the wakeup preempt check
sched: wakeup preemption fix
sched: remove PREEMPT_RESTRICT
sched: turn off PREEMPT_RESTRICT
KVM: fix !SMP build error
x86: make nmi_cpu_busy() always defined
x86: make ipi_handler() always defined
sched: cleanup, use NSEC_PER_MSEC and NSEC_PER_SEC
sched: reintroduce SMP tunings again
sched: restore deterministic CPU accounting on powerpc
sched: fix delay accounting regression
sched: reintroduce the sched_min_granularity tunable
sched: documentation: place_entity() comments
sched: fix vslice
-rw-r--r-- | arch/powerpc/kernel/process.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/time.c | 25 | ||||
-rw-r--r-- | arch/s390/kernel/time.c | 4 | ||||
-rw-r--r-- | arch/s390/kernel/vtime.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/main.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/nmi_32.c | 4 | ||||
-rw-r--r-- | include/linux/sched.h | 17 | ||||
-rw-r--r-- | include/linux/smp.h | 7 | ||||
-rw-r--r-- | init/main.c | 4 | ||||
-rw-r--r-- | kernel/fork.c | 6 | ||||
-rw-r--r-- | kernel/sched.c | 59 | ||||
-rw-r--r-- | kernel/sched_debug.c | 2 | ||||
-rw-r--r-- | kernel/sched_fair.c | 96 | ||||
-rw-r--r-- | kernel/sched_stats.h | 11 | ||||
-rw-r--r-- | kernel/sysctl.c | 23 | ||||
-rw-r--r-- | kernel/timer.c | 21 |
16 files changed, 172 insertions, 123 deletions
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index b9d88374f14f..41e13f4cc6e3 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c | |||
@@ -350,7 +350,7 @@ struct task_struct *__switch_to(struct task_struct *prev, | |||
350 | local_irq_save(flags); | 350 | local_irq_save(flags); |
351 | 351 | ||
352 | account_system_vtime(current); | 352 | account_system_vtime(current); |
353 | account_process_vtime(current); | 353 | account_process_tick(current, 0); |
354 | calculate_steal_time(); | 354 | calculate_steal_time(); |
355 | 355 | ||
356 | last = _switch(old_thread, new_thread); | 356 | last = _switch(old_thread, new_thread); |
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 99ebcd3884d2..4beb6329dfb7 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c | |||
@@ -259,7 +259,7 @@ void account_system_vtime(struct task_struct *tsk) | |||
259 | * user and system time records. | 259 | * user and system time records. |
260 | * Must be called with interrupts disabled. | 260 | * Must be called with interrupts disabled. |
261 | */ | 261 | */ |
262 | void account_process_vtime(struct task_struct *tsk) | 262 | void account_process_tick(struct task_struct *tsk, int user_tick) |
263 | { | 263 | { |
264 | cputime_t utime, utimescaled; | 264 | cputime_t utime, utimescaled; |
265 | 265 | ||
@@ -274,18 +274,6 @@ void account_process_vtime(struct task_struct *tsk) | |||
274 | account_user_time_scaled(tsk, utimescaled); | 274 | account_user_time_scaled(tsk, utimescaled); |
275 | } | 275 | } |
276 | 276 | ||
277 | static void account_process_time(struct pt_regs *regs) | ||
278 | { | ||
279 | int cpu = smp_processor_id(); | ||
280 | |||
281 | account_process_vtime(current); | ||
282 | run_local_timers(); | ||
283 | if (rcu_pending(cpu)) | ||
284 | rcu_check_callbacks(cpu, user_mode(regs)); | ||
285 | scheduler_tick(); | ||
286 | run_posix_cpu_timers(current); | ||
287 | } | ||
288 | |||
289 | /* | 277 | /* |
290 | * Stuff for accounting stolen time. | 278 | * Stuff for accounting stolen time. |
291 | */ | 279 | */ |
@@ -375,7 +363,6 @@ static void snapshot_purr(void) | |||
375 | 363 | ||
376 | #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ | 364 | #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ |
377 | #define calc_cputime_factors() | 365 | #define calc_cputime_factors() |
378 | #define account_process_time(regs) update_process_times(user_mode(regs)) | ||
379 | #define calculate_steal_time() do { } while (0) | 366 | #define calculate_steal_time() do { } while (0) |
380 | #endif | 367 | #endif |
381 | 368 | ||
@@ -599,16 +586,6 @@ void timer_interrupt(struct pt_regs * regs) | |||
599 | get_lppaca()->int_dword.fields.decr_int = 0; | 586 | get_lppaca()->int_dword.fields.decr_int = 0; |
600 | #endif | 587 | #endif |
601 | 588 | ||
602 | /* | ||
603 | * We cannot disable the decrementer, so in the period | ||
604 | * between this cpu's being marked offline in cpu_online_map | ||
605 | * and calling stop-self, it is taking timer interrupts. | ||
606 | * Avoid calling into the scheduler rebalancing code if this | ||
607 | * is the case. | ||
608 | */ | ||
609 | if (!cpu_is_offline(cpu)) | ||
610 | account_process_time(regs); | ||
611 | |||
612 | if (evt->event_handler) | 589 | if (evt->event_handler) |
613 | evt->event_handler(evt); | 590 | evt->event_handler(evt); |
614 | 591 | ||
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index a963fe81359e..22b800ce2126 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c | |||
@@ -145,12 +145,8 @@ void account_ticks(u64 time) | |||
145 | do_timer(ticks); | 145 | do_timer(ticks); |
146 | #endif | 146 | #endif |
147 | 147 | ||
148 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
149 | account_tick_vtime(current); | ||
150 | #else | ||
151 | while (ticks--) | 148 | while (ticks--) |
152 | update_process_times(user_mode(get_irq_regs())); | 149 | update_process_times(user_mode(get_irq_regs())); |
153 | #endif | ||
154 | 150 | ||
155 | s390_do_profile(); | 151 | s390_do_profile(); |
156 | } | 152 | } |
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 84ff78de6bac..c5f05b3fb2c3 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c | |||
@@ -32,7 +32,7 @@ static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); | |||
32 | * Update process times based on virtual cpu times stored by entry.S | 32 | * Update process times based on virtual cpu times stored by entry.S |
33 | * to the lowcore fields user_timer, system_timer & steal_clock. | 33 | * to the lowcore fields user_timer, system_timer & steal_clock. |
34 | */ | 34 | */ |
35 | void account_tick_vtime(struct task_struct *tsk) | 35 | void account_process_tick(struct task_struct *tsk, int user_tick) |
36 | { | 36 | { |
37 | cputime_t cputime; | 37 | cputime_t cputime; |
38 | __u64 timer, clock; | 38 | __u64 timer, clock; |
@@ -64,12 +64,6 @@ void account_tick_vtime(struct task_struct *tsk) | |||
64 | S390_lowcore.steal_clock -= cputime << 12; | 64 | S390_lowcore.steal_clock -= cputime << 12; |
65 | account_steal_time(tsk, cputime); | 65 | account_steal_time(tsk, cputime); |
66 | } | 66 | } |
67 | |||
68 | run_local_timers(); | ||
69 | if (rcu_pending(smp_processor_id())) | ||
70 | rcu_check_callbacks(smp_processor_id(), rcu_user_flag); | ||
71 | scheduler_tick(); | ||
72 | run_posix_cpu_timers(tsk); | ||
73 | } | 67 | } |
74 | 68 | ||
75 | /* | 69 | /* |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 9abbdf7562c5..3b20613325dc 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -139,13 +139,12 @@ struct set_mtrr_data { | |||
139 | mtrr_type smp_type; | 139 | mtrr_type smp_type; |
140 | }; | 140 | }; |
141 | 141 | ||
142 | #ifdef CONFIG_SMP | ||
143 | |||
144 | static void ipi_handler(void *info) | 142 | static void ipi_handler(void *info) |
145 | /* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. | 143 | /* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. |
146 | [RETURNS] Nothing. | 144 | [RETURNS] Nothing. |
147 | */ | 145 | */ |
148 | { | 146 | { |
147 | #ifdef CONFIG_SMP | ||
149 | struct set_mtrr_data *data = info; | 148 | struct set_mtrr_data *data = info; |
150 | unsigned long flags; | 149 | unsigned long flags; |
151 | 150 | ||
@@ -168,9 +167,8 @@ static void ipi_handler(void *info) | |||
168 | 167 | ||
169 | atomic_dec(&data->count); | 168 | atomic_dec(&data->count); |
170 | local_irq_restore(flags); | 169 | local_irq_restore(flags); |
171 | } | ||
172 | |||
173 | #endif | 170 | #endif |
171 | } | ||
174 | 172 | ||
175 | static inline int types_compatible(mtrr_type type1, mtrr_type type2) { | 173 | static inline int types_compatible(mtrr_type type1, mtrr_type type2) { |
176 | return type1 == MTRR_TYPE_UNCACHABLE || | 174 | return type1 == MTRR_TYPE_UNCACHABLE || |
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index f803ed0ed1c4..600fd404e440 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c | |||
@@ -51,13 +51,13 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); | |||
51 | 51 | ||
52 | static int endflag __initdata = 0; | 52 | static int endflag __initdata = 0; |
53 | 53 | ||
54 | #ifdef CONFIG_SMP | ||
55 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when | 54 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when |
56 | * the CPU is idle. To make sure the NMI watchdog really ticks on all | 55 | * the CPU is idle. To make sure the NMI watchdog really ticks on all |
57 | * CPUs during the test make them busy. | 56 | * CPUs during the test make them busy. |
58 | */ | 57 | */ |
59 | static __init void nmi_cpu_busy(void *data) | 58 | static __init void nmi_cpu_busy(void *data) |
60 | { | 59 | { |
60 | #ifdef CONFIG_SMP | ||
61 | local_irq_enable_in_hardirq(); | 61 | local_irq_enable_in_hardirq(); |
62 | /* Intentionally don't use cpu_relax here. This is | 62 | /* Intentionally don't use cpu_relax here. This is |
63 | to make sure that the performance counter really ticks, | 63 | to make sure that the performance counter really ticks, |
@@ -67,8 +67,8 @@ static __init void nmi_cpu_busy(void *data) | |||
67 | care if they get somewhat less cycles. */ | 67 | care if they get somewhat less cycles. */ |
68 | while (endflag == 0) | 68 | while (endflag == 0) |
69 | mb(); | 69 | mb(); |
70 | } | ||
71 | #endif | 70 | #endif |
71 | } | ||
72 | 72 | ||
73 | static int __init check_nmi_watchdog(void) | 73 | static int __init check_nmi_watchdog(void) |
74 | { | 74 | { |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 155d7438f7ad..ee800e7a70de 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -254,6 +254,7 @@ long io_schedule_timeout(long timeout); | |||
254 | 254 | ||
255 | extern void cpu_init (void); | 255 | extern void cpu_init (void); |
256 | extern void trap_init(void); | 256 | extern void trap_init(void); |
257 | extern void account_process_tick(struct task_struct *task, int user); | ||
257 | extern void update_process_times(int user); | 258 | extern void update_process_times(int user); |
258 | extern void scheduler_tick(void); | 259 | extern void scheduler_tick(void); |
259 | 260 | ||
@@ -862,7 +863,6 @@ struct sched_entity { | |||
862 | struct load_weight load; /* for load-balancing */ | 863 | struct load_weight load; /* for load-balancing */ |
863 | struct rb_node run_node; | 864 | struct rb_node run_node; |
864 | unsigned int on_rq; | 865 | unsigned int on_rq; |
865 | int peer_preempt; | ||
866 | 866 | ||
867 | u64 exec_start; | 867 | u64 exec_start; |
868 | u64 sum_exec_runtime; | 868 | u64 sum_exec_runtime; |
@@ -1460,12 +1460,17 @@ extern void sched_idle_next(void); | |||
1460 | 1460 | ||
1461 | #ifdef CONFIG_SCHED_DEBUG | 1461 | #ifdef CONFIG_SCHED_DEBUG |
1462 | extern unsigned int sysctl_sched_latency; | 1462 | extern unsigned int sysctl_sched_latency; |
1463 | extern unsigned int sysctl_sched_nr_latency; | 1463 | extern unsigned int sysctl_sched_min_granularity; |
1464 | extern unsigned int sysctl_sched_wakeup_granularity; | 1464 | extern unsigned int sysctl_sched_wakeup_granularity; |
1465 | extern unsigned int sysctl_sched_batch_wakeup_granularity; | 1465 | extern unsigned int sysctl_sched_batch_wakeup_granularity; |
1466 | extern unsigned int sysctl_sched_child_runs_first; | 1466 | extern unsigned int sysctl_sched_child_runs_first; |
1467 | extern unsigned int sysctl_sched_features; | 1467 | extern unsigned int sysctl_sched_features; |
1468 | extern unsigned int sysctl_sched_migration_cost; | 1468 | extern unsigned int sysctl_sched_migration_cost; |
1469 | extern unsigned int sysctl_sched_nr_migrate; | ||
1470 | |||
1471 | int sched_nr_latency_handler(struct ctl_table *table, int write, | ||
1472 | struct file *file, void __user *buffer, size_t *length, | ||
1473 | loff_t *ppos); | ||
1469 | #endif | 1474 | #endif |
1470 | 1475 | ||
1471 | extern unsigned int sysctl_sched_compat_yield; | 1476 | extern unsigned int sysctl_sched_compat_yield; |
@@ -1983,6 +1988,14 @@ static inline void inc_syscw(struct task_struct *tsk) | |||
1983 | } | 1988 | } |
1984 | #endif | 1989 | #endif |
1985 | 1990 | ||
1991 | #ifdef CONFIG_SMP | ||
1992 | void migration_init(void); | ||
1993 | #else | ||
1994 | static inline void migration_init(void) | ||
1995 | { | ||
1996 | } | ||
1997 | #endif | ||
1998 | |||
1986 | #endif /* __KERNEL__ */ | 1999 | #endif /* __KERNEL__ */ |
1987 | 2000 | ||
1988 | #endif | 2001 | #endif |
diff --git a/include/linux/smp.h b/include/linux/smp.h index 259a13c3bd98..c25e66bcecf3 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h | |||
@@ -84,11 +84,12 @@ void smp_prepare_boot_cpu(void); | |||
84 | * These macros fold the SMP functionality into a single CPU system | 84 | * These macros fold the SMP functionality into a single CPU system |
85 | */ | 85 | */ |
86 | #define raw_smp_processor_id() 0 | 86 | #define raw_smp_processor_id() 0 |
87 | static inline int up_smp_call_function(void) | 87 | static inline int up_smp_call_function(void (*func)(void *), void *info) |
88 | { | 88 | { |
89 | return 0; | 89 | return 0; |
90 | } | 90 | } |
91 | #define smp_call_function(func,info,retry,wait) (up_smp_call_function()) | 91 | #define smp_call_function(func, info, retry, wait) \ |
92 | (up_smp_call_function(func, info)) | ||
92 | #define on_each_cpu(func,info,retry,wait) \ | 93 | #define on_each_cpu(func,info,retry,wait) \ |
93 | ({ \ | 94 | ({ \ |
94 | local_irq_disable(); \ | 95 | local_irq_disable(); \ |
@@ -107,6 +108,8 @@ static inline void smp_send_reschedule(int cpu) { } | |||
107 | local_irq_enable(); \ | 108 | local_irq_enable(); \ |
108 | 0; \ | 109 | 0; \ |
109 | }) | 110 | }) |
111 | #define smp_call_function_mask(mask, func, info, wait) \ | ||
112 | (up_smp_call_function(func, info)) | ||
110 | 113 | ||
111 | #endif /* !SMP */ | 114 | #endif /* !SMP */ |
112 | 115 | ||
diff --git a/init/main.c b/init/main.c index f605a969ea61..80b04b6c5157 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <linux/pid_namespace.h> | 56 | #include <linux/pid_namespace.h> |
57 | #include <linux/device.h> | 57 | #include <linux/device.h> |
58 | #include <linux/kthread.h> | 58 | #include <linux/kthread.h> |
59 | #include <linux/sched.h> | ||
59 | 60 | ||
60 | #include <asm/io.h> | 61 | #include <asm/io.h> |
61 | #include <asm/bugs.h> | 62 | #include <asm/bugs.h> |
@@ -747,11 +748,8 @@ __setup("nosoftlockup", nosoftlockup_setup); | |||
747 | static void __init do_pre_smp_initcalls(void) | 748 | static void __init do_pre_smp_initcalls(void) |
748 | { | 749 | { |
749 | extern int spawn_ksoftirqd(void); | 750 | extern int spawn_ksoftirqd(void); |
750 | #ifdef CONFIG_SMP | ||
751 | extern int migration_init(void); | ||
752 | 751 | ||
753 | migration_init(); | 752 | migration_init(); |
754 | #endif | ||
755 | spawn_ksoftirqd(); | 753 | spawn_ksoftirqd(); |
756 | if (!nosoftlockup) | 754 | if (!nosoftlockup) |
757 | spawn_softlockup_task(); | 755 | spawn_softlockup_task(); |
diff --git a/kernel/fork.c b/kernel/fork.c index 28a740151988..8ca1a14cdc8c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1123,6 +1123,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1123 | p->blocked_on = NULL; /* not blocked yet */ | 1123 | p->blocked_on = NULL; /* not blocked yet */ |
1124 | #endif | 1124 | #endif |
1125 | 1125 | ||
1126 | /* Perform scheduler related setup. Assign this task to a CPU. */ | ||
1127 | sched_fork(p, clone_flags); | ||
1128 | |||
1126 | if ((retval = security_task_alloc(p))) | 1129 | if ((retval = security_task_alloc(p))) |
1127 | goto bad_fork_cleanup_policy; | 1130 | goto bad_fork_cleanup_policy; |
1128 | if ((retval = audit_alloc(p))) | 1131 | if ((retval = audit_alloc(p))) |
@@ -1212,9 +1215,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1212 | INIT_LIST_HEAD(&p->ptrace_children); | 1215 | INIT_LIST_HEAD(&p->ptrace_children); |
1213 | INIT_LIST_HEAD(&p->ptrace_list); | 1216 | INIT_LIST_HEAD(&p->ptrace_list); |
1214 | 1217 | ||
1215 | /* Perform scheduler related setup. Assign this task to a CPU. */ | ||
1216 | sched_fork(p, clone_flags); | ||
1217 | |||
1218 | /* Now that the task is set up, run cgroup callbacks if | 1218 | /* Now that the task is set up, run cgroup callbacks if |
1219 | * necessary. We need to run them before the task is visible | 1219 | * necessary. We need to run them before the task is visible |
1220 | * on the tasklist. */ | 1220 | * on the tasklist. */ |
diff --git a/kernel/sched.c b/kernel/sched.c index 3f6bd1112900..b18f231a4875 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -75,7 +75,7 @@ | |||
75 | */ | 75 | */ |
76 | unsigned long long __attribute__((weak)) sched_clock(void) | 76 | unsigned long long __attribute__((weak)) sched_clock(void) |
77 | { | 77 | { |
78 | return (unsigned long long)jiffies * (1000000000 / HZ); | 78 | return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); |
79 | } | 79 | } |
80 | 80 | ||
81 | /* | 81 | /* |
@@ -99,8 +99,8 @@ unsigned long long __attribute__((weak)) sched_clock(void) | |||
99 | /* | 99 | /* |
100 | * Some helpers for converting nanosecond timing to jiffy resolution | 100 | * Some helpers for converting nanosecond timing to jiffy resolution |
101 | */ | 101 | */ |
102 | #define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (1000000000 / HZ)) | 102 | #define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ)) |
103 | #define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ)) | 103 | #define JIFFIES_TO_NS(TIME) ((TIME) * (NSEC_PER_SEC / HZ)) |
104 | 104 | ||
105 | #define NICE_0_LOAD SCHED_LOAD_SCALE | 105 | #define NICE_0_LOAD SCHED_LOAD_SCALE |
106 | #define NICE_0_SHIFT SCHED_LOAD_SHIFT | 106 | #define NICE_0_SHIFT SCHED_LOAD_SHIFT |
@@ -460,7 +460,6 @@ enum { | |||
460 | SCHED_FEAT_TREE_AVG = 4, | 460 | SCHED_FEAT_TREE_AVG = 4, |
461 | SCHED_FEAT_APPROX_AVG = 8, | 461 | SCHED_FEAT_APPROX_AVG = 8, |
462 | SCHED_FEAT_WAKEUP_PREEMPT = 16, | 462 | SCHED_FEAT_WAKEUP_PREEMPT = 16, |
463 | SCHED_FEAT_PREEMPT_RESTRICT = 32, | ||
464 | }; | 463 | }; |
465 | 464 | ||
466 | const_debug unsigned int sysctl_sched_features = | 465 | const_debug unsigned int sysctl_sched_features = |
@@ -468,12 +467,17 @@ const_debug unsigned int sysctl_sched_features = | |||
468 | SCHED_FEAT_START_DEBIT * 1 | | 467 | SCHED_FEAT_START_DEBIT * 1 | |
469 | SCHED_FEAT_TREE_AVG * 0 | | 468 | SCHED_FEAT_TREE_AVG * 0 | |
470 | SCHED_FEAT_APPROX_AVG * 0 | | 469 | SCHED_FEAT_APPROX_AVG * 0 | |
471 | SCHED_FEAT_WAKEUP_PREEMPT * 1 | | 470 | SCHED_FEAT_WAKEUP_PREEMPT * 1; |
472 | SCHED_FEAT_PREEMPT_RESTRICT * 1; | ||
473 | 471 | ||
474 | #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x) | 472 | #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x) |
475 | 473 | ||
476 | /* | 474 | /* |
475 | * Number of tasks to iterate in a single balance run. | ||
476 | * Limited because this is done with IRQs disabled. | ||
477 | */ | ||
478 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | ||
479 | |||
480 | /* | ||
477 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | 481 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu |
478 | * clock constructed from sched_clock(): | 482 | * clock constructed from sched_clock(): |
479 | */ | 483 | */ |
@@ -2237,7 +2241,7 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2237 | enum cpu_idle_type idle, int *all_pinned, | 2241 | enum cpu_idle_type idle, int *all_pinned, |
2238 | int *this_best_prio, struct rq_iterator *iterator) | 2242 | int *this_best_prio, struct rq_iterator *iterator) |
2239 | { | 2243 | { |
2240 | int pulled = 0, pinned = 0, skip_for_load; | 2244 | int loops = 0, pulled = 0, pinned = 0, skip_for_load; |
2241 | struct task_struct *p; | 2245 | struct task_struct *p; |
2242 | long rem_load_move = max_load_move; | 2246 | long rem_load_move = max_load_move; |
2243 | 2247 | ||
@@ -2251,10 +2255,10 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2251 | */ | 2255 | */ |
2252 | p = iterator->start(iterator->arg); | 2256 | p = iterator->start(iterator->arg); |
2253 | next: | 2257 | next: |
2254 | if (!p) | 2258 | if (!p || loops++ > sysctl_sched_nr_migrate) |
2255 | goto out; | 2259 | goto out; |
2256 | /* | 2260 | /* |
2257 | * To help distribute high priority tasks accross CPUs we don't | 2261 | * To help distribute high priority tasks across CPUs we don't |
2258 | * skip a task if it will be the highest priority task (i.e. smallest | 2262 | * skip a task if it will be the highest priority task (i.e. smallest |
2259 | * prio value) on its new queue regardless of its load weight | 2263 | * prio value) on its new queue regardless of its load weight |
2260 | */ | 2264 | */ |
@@ -2271,8 +2275,7 @@ next: | |||
2271 | rem_load_move -= p->se.load.weight; | 2275 | rem_load_move -= p->se.load.weight; |
2272 | 2276 | ||
2273 | /* | 2277 | /* |
2274 | * We only want to steal up to the prescribed number of tasks | 2278 | * We only want to steal up to the prescribed amount of weighted load. |
2275 | * and the prescribed amount of weighted load. | ||
2276 | */ | 2279 | */ |
2277 | if (rem_load_move > 0) { | 2280 | if (rem_load_move > 0) { |
2278 | if (p->prio < *this_best_prio) | 2281 | if (p->prio < *this_best_prio) |
@@ -4992,6 +4995,32 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
4992 | */ | 4995 | */ |
4993 | cpumask_t nohz_cpu_mask = CPU_MASK_NONE; | 4996 | cpumask_t nohz_cpu_mask = CPU_MASK_NONE; |
4994 | 4997 | ||
4998 | /* | ||
4999 | * Increase the granularity value when there are more CPUs, | ||
5000 | * because with more CPUs the 'effective latency' as visible | ||
5001 | * to users decreases. But the relationship is not linear, | ||
5002 | * so pick a second-best guess by going with the log2 of the | ||
5003 | * number of CPUs. | ||
5004 | * | ||
5005 | * This idea comes from the SD scheduler of Con Kolivas: | ||
5006 | */ | ||
5007 | static inline void sched_init_granularity(void) | ||
5008 | { | ||
5009 | unsigned int factor = 1 + ilog2(num_online_cpus()); | ||
5010 | const unsigned long limit = 200000000; | ||
5011 | |||
5012 | sysctl_sched_min_granularity *= factor; | ||
5013 | if (sysctl_sched_min_granularity > limit) | ||
5014 | sysctl_sched_min_granularity = limit; | ||
5015 | |||
5016 | sysctl_sched_latency *= factor; | ||
5017 | if (sysctl_sched_latency > limit) | ||
5018 | sysctl_sched_latency = limit; | ||
5019 | |||
5020 | sysctl_sched_wakeup_granularity *= factor; | ||
5021 | sysctl_sched_batch_wakeup_granularity *= factor; | ||
5022 | } | ||
5023 | |||
4995 | #ifdef CONFIG_SMP | 5024 | #ifdef CONFIG_SMP |
4996 | /* | 5025 | /* |
4997 | * This is how migration works: | 5026 | * This is how migration works: |
@@ -5621,7 +5650,7 @@ static struct notifier_block __cpuinitdata migration_notifier = { | |||
5621 | .priority = 10 | 5650 | .priority = 10 |
5622 | }; | 5651 | }; |
5623 | 5652 | ||
5624 | int __init migration_init(void) | 5653 | void __init migration_init(void) |
5625 | { | 5654 | { |
5626 | void *cpu = (void *)(long)smp_processor_id(); | 5655 | void *cpu = (void *)(long)smp_processor_id(); |
5627 | int err; | 5656 | int err; |
@@ -5631,8 +5660,6 @@ int __init migration_init(void) | |||
5631 | BUG_ON(err == NOTIFY_BAD); | 5660 | BUG_ON(err == NOTIFY_BAD); |
5632 | migration_call(&migration_notifier, CPU_ONLINE, cpu); | 5661 | migration_call(&migration_notifier, CPU_ONLINE, cpu); |
5633 | register_cpu_notifier(&migration_notifier); | 5662 | register_cpu_notifier(&migration_notifier); |
5634 | |||
5635 | return 0; | ||
5636 | } | 5663 | } |
5637 | #endif | 5664 | #endif |
5638 | 5665 | ||
@@ -6688,10 +6715,12 @@ void __init sched_init_smp(void) | |||
6688 | /* Move init over to a non-isolated CPU */ | 6715 | /* Move init over to a non-isolated CPU */ |
6689 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) | 6716 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) |
6690 | BUG(); | 6717 | BUG(); |
6718 | sched_init_granularity(); | ||
6691 | } | 6719 | } |
6692 | #else | 6720 | #else |
6693 | void __init sched_init_smp(void) | 6721 | void __init sched_init_smp(void) |
6694 | { | 6722 | { |
6723 | sched_init_granularity(); | ||
6695 | } | 6724 | } |
6696 | #endif /* CONFIG_SMP */ | 6725 | #endif /* CONFIG_SMP */ |
6697 | 6726 | ||
@@ -7228,7 +7257,7 @@ static u64 cpu_usage_read(struct cgroup *cgrp, struct cftype *cft) | |||
7228 | spin_unlock_irqrestore(&cpu_rq(i)->lock, flags); | 7257 | spin_unlock_irqrestore(&cpu_rq(i)->lock, flags); |
7229 | } | 7258 | } |
7230 | /* Convert from ns to ms */ | 7259 | /* Convert from ns to ms */ |
7231 | do_div(res, 1000000); | 7260 | do_div(res, NSEC_PER_MSEC); |
7232 | 7261 | ||
7233 | return res; | 7262 | return res; |
7234 | } | 7263 | } |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 415e5c385542..ca198a797bfa 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -211,7 +211,7 @@ static int sched_debug_show(struct seq_file *m, void *v) | |||
211 | #define PN(x) \ | 211 | #define PN(x) \ |
212 | SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) | 212 | SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) |
213 | PN(sysctl_sched_latency); | 213 | PN(sysctl_sched_latency); |
214 | PN(sysctl_sched_nr_latency); | 214 | PN(sysctl_sched_min_granularity); |
215 | PN(sysctl_sched_wakeup_granularity); | 215 | PN(sysctl_sched_wakeup_granularity); |
216 | PN(sysctl_sched_batch_wakeup_granularity); | 216 | PN(sysctl_sched_batch_wakeup_granularity); |
217 | PN(sysctl_sched_child_runs_first); | 217 | PN(sysctl_sched_child_runs_first); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 01859f662ab7..d3c03070872d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -22,7 +22,7 @@ | |||
22 | 22 | ||
23 | /* | 23 | /* |
24 | * Targeted preemption latency for CPU-bound tasks: | 24 | * Targeted preemption latency for CPU-bound tasks: |
25 | * (default: 20ms, units: nanoseconds) | 25 | * (default: 20ms * ilog(ncpus), units: nanoseconds) |
26 | * | 26 | * |
27 | * NOTE: this latency value is not the same as the concept of | 27 | * NOTE: this latency value is not the same as the concept of |
28 | * 'timeslice length' - timeslices in CFS are of variable length | 28 | * 'timeslice length' - timeslices in CFS are of variable length |
@@ -32,19 +32,24 @@ | |||
32 | * (to see the precise effective timeslice length of your workload, | 32 | * (to see the precise effective timeslice length of your workload, |
33 | * run vmstat and monitor the context-switches (cs) field) | 33 | * run vmstat and monitor the context-switches (cs) field) |
34 | */ | 34 | */ |
35 | const_debug unsigned int sysctl_sched_latency = 20000000ULL; | 35 | unsigned int sysctl_sched_latency = 20000000ULL; |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * After fork, child runs first. (default) If set to 0 then | 38 | * Minimal preemption granularity for CPU-bound tasks: |
39 | * parent will (try to) run first. | 39 | * (default: 1 msec * ilog(ncpus), units: nanoseconds) |
40 | */ | 40 | */ |
41 | const_debug unsigned int sysctl_sched_child_runs_first = 1; | 41 | unsigned int sysctl_sched_min_granularity = 1000000ULL; |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Minimal preemption granularity for CPU-bound tasks: | 44 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity |
45 | * (default: 2 msec, units: nanoseconds) | 45 | */ |
46 | unsigned int sched_nr_latency = 20; | ||
47 | |||
48 | /* | ||
49 | * After fork, child runs first. (default) If set to 0 then | ||
50 | * parent will (try to) run first. | ||
46 | */ | 51 | */ |
47 | const_debug unsigned int sysctl_sched_nr_latency = 20; | 52 | const_debug unsigned int sysctl_sched_child_runs_first = 1; |
48 | 53 | ||
49 | /* | 54 | /* |
50 | * sys_sched_yield() compat mode | 55 | * sys_sched_yield() compat mode |
@@ -56,23 +61,23 @@ unsigned int __read_mostly sysctl_sched_compat_yield; | |||
56 | 61 | ||
57 | /* | 62 | /* |
58 | * SCHED_BATCH wake-up granularity. | 63 | * SCHED_BATCH wake-up granularity. |
59 | * (default: 10 msec, units: nanoseconds) | 64 | * (default: 10 msec * ilog(ncpus), units: nanoseconds) |
60 | * | 65 | * |
61 | * This option delays the preemption effects of decoupled workloads | 66 | * This option delays the preemption effects of decoupled workloads |
62 | * and reduces their over-scheduling. Synchronous workloads will still | 67 | * and reduces their over-scheduling. Synchronous workloads will still |
63 | * have immediate wakeup/sleep latencies. | 68 | * have immediate wakeup/sleep latencies. |
64 | */ | 69 | */ |
65 | const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL; | 70 | unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL; |
66 | 71 | ||
67 | /* | 72 | /* |
68 | * SCHED_OTHER wake-up granularity. | 73 | * SCHED_OTHER wake-up granularity. |
69 | * (default: 10 msec, units: nanoseconds) | 74 | * (default: 10 msec * ilog(ncpus), units: nanoseconds) |
70 | * | 75 | * |
71 | * This option delays the preemption effects of decoupled workloads | 76 | * This option delays the preemption effects of decoupled workloads |
72 | * and reduces their over-scheduling. Synchronous workloads will still | 77 | * and reduces their over-scheduling. Synchronous workloads will still |
73 | * have immediate wakeup/sleep latencies. | 78 | * have immediate wakeup/sleep latencies. |
74 | */ | 79 | */ |
75 | const_debug unsigned int sysctl_sched_wakeup_granularity = 10000000UL; | 80 | unsigned int sysctl_sched_wakeup_granularity = 10000000UL; |
76 | 81 | ||
77 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 82 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
78 | 83 | ||
@@ -212,6 +217,22 @@ static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | |||
212 | * Scheduling class statistics methods: | 217 | * Scheduling class statistics methods: |
213 | */ | 218 | */ |
214 | 219 | ||
220 | #ifdef CONFIG_SCHED_DEBUG | ||
221 | int sched_nr_latency_handler(struct ctl_table *table, int write, | ||
222 | struct file *filp, void __user *buffer, size_t *lenp, | ||
223 | loff_t *ppos) | ||
224 | { | ||
225 | int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
226 | |||
227 | if (ret || !write) | ||
228 | return ret; | ||
229 | |||
230 | sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency, | ||
231 | sysctl_sched_min_granularity); | ||
232 | |||
233 | return 0; | ||
234 | } | ||
235 | #endif | ||
215 | 236 | ||
216 | /* | 237 | /* |
217 | * The idea is to set a period in which each task runs once. | 238 | * The idea is to set a period in which each task runs once. |
@@ -224,7 +245,7 @@ static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | |||
224 | static u64 __sched_period(unsigned long nr_running) | 245 | static u64 __sched_period(unsigned long nr_running) |
225 | { | 246 | { |
226 | u64 period = sysctl_sched_latency; | 247 | u64 period = sysctl_sched_latency; |
227 | unsigned long nr_latency = sysctl_sched_nr_latency; | 248 | unsigned long nr_latency = sched_nr_latency; |
228 | 249 | ||
229 | if (unlikely(nr_running > nr_latency)) { | 250 | if (unlikely(nr_running > nr_latency)) { |
230 | period *= nr_running; | 251 | period *= nr_running; |
@@ -259,6 +280,7 @@ static u64 __sched_vslice(unsigned long rq_weight, unsigned long nr_running) | |||
259 | { | 280 | { |
260 | u64 vslice = __sched_period(nr_running); | 281 | u64 vslice = __sched_period(nr_running); |
261 | 282 | ||
283 | vslice *= NICE_0_LOAD; | ||
262 | do_div(vslice, rq_weight); | 284 | do_div(vslice, rq_weight); |
263 | 285 | ||
264 | return vslice; | 286 | return vslice; |
@@ -472,19 +494,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
472 | } else if (sched_feat(APPROX_AVG) && cfs_rq->nr_running) | 494 | } else if (sched_feat(APPROX_AVG) && cfs_rq->nr_running) |
473 | vruntime += sched_vslice(cfs_rq)/2; | 495 | vruntime += sched_vslice(cfs_rq)/2; |
474 | 496 | ||
497 | /* | ||
498 | * The 'current' period is already promised to the current tasks, | ||
499 | * however the extra weight of the new task will slow them down a | ||
500 | * little, place the new task so that it fits in the slot that | ||
501 | * stays open at the end. | ||
502 | */ | ||
475 | if (initial && sched_feat(START_DEBIT)) | 503 | if (initial && sched_feat(START_DEBIT)) |
476 | vruntime += sched_vslice_add(cfs_rq, se); | 504 | vruntime += sched_vslice_add(cfs_rq, se); |
477 | 505 | ||
478 | if (!initial) { | 506 | if (!initial) { |
507 | /* sleeps upto a single latency don't count. */ | ||
479 | if (sched_feat(NEW_FAIR_SLEEPERS) && entity_is_task(se) && | 508 | if (sched_feat(NEW_FAIR_SLEEPERS) && entity_is_task(se) && |
480 | task_of(se)->policy != SCHED_BATCH) | 509 | task_of(se)->policy != SCHED_BATCH) |
481 | vruntime -= sysctl_sched_latency; | 510 | vruntime -= sysctl_sched_latency; |
482 | 511 | ||
483 | vruntime = max_t(s64, vruntime, se->vruntime); | 512 | /* ensure we never gain time by being placed backwards. */ |
513 | vruntime = max_vruntime(se->vruntime, vruntime); | ||
484 | } | 514 | } |
485 | 515 | ||
486 | se->vruntime = vruntime; | 516 | se->vruntime = vruntime; |
487 | |||
488 | } | 517 | } |
489 | 518 | ||
490 | static void | 519 | static void |
@@ -517,7 +546,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | |||
517 | 546 | ||
518 | update_stats_dequeue(cfs_rq, se); | 547 | update_stats_dequeue(cfs_rq, se); |
519 | if (sleep) { | 548 | if (sleep) { |
520 | se->peer_preempt = 0; | ||
521 | #ifdef CONFIG_SCHEDSTATS | 549 | #ifdef CONFIG_SCHEDSTATS |
522 | if (entity_is_task(se)) { | 550 | if (entity_is_task(se)) { |
523 | struct task_struct *tsk = task_of(se); | 551 | struct task_struct *tsk = task_of(se); |
@@ -545,10 +573,8 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
545 | 573 | ||
546 | ideal_runtime = sched_slice(cfs_rq, curr); | 574 | ideal_runtime = sched_slice(cfs_rq, curr); |
547 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; | 575 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; |
548 | if (delta_exec > ideal_runtime || | 576 | if (delta_exec > ideal_runtime) |
549 | (sched_feat(PREEMPT_RESTRICT) && curr->peer_preempt)) | ||
550 | resched_task(rq_of(cfs_rq)->curr); | 577 | resched_task(rq_of(cfs_rq)->curr); |
551 | curr->peer_preempt = 0; | ||
552 | } | 578 | } |
553 | 579 | ||
554 | static void | 580 | static void |
@@ -811,7 +837,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) | |||
811 | struct task_struct *curr = rq->curr; | 837 | struct task_struct *curr = rq->curr; |
812 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 838 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
813 | struct sched_entity *se = &curr->se, *pse = &p->se; | 839 | struct sched_entity *se = &curr->se, *pse = &p->se; |
814 | s64 delta, gran; | 840 | unsigned long gran; |
815 | 841 | ||
816 | if (unlikely(rt_prio(p->prio))) { | 842 | if (unlikely(rt_prio(p->prio))) { |
817 | update_rq_clock(rq); | 843 | update_rq_clock(rq); |
@@ -826,24 +852,20 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) | |||
826 | if (unlikely(p->policy == SCHED_BATCH)) | 852 | if (unlikely(p->policy == SCHED_BATCH)) |
827 | return; | 853 | return; |
828 | 854 | ||
829 | if (sched_feat(WAKEUP_PREEMPT)) { | 855 | if (!sched_feat(WAKEUP_PREEMPT)) |
830 | while (!is_same_group(se, pse)) { | 856 | return; |
831 | se = parent_entity(se); | ||
832 | pse = parent_entity(pse); | ||
833 | } | ||
834 | 857 | ||
835 | delta = se->vruntime - pse->vruntime; | 858 | while (!is_same_group(se, pse)) { |
836 | gran = sysctl_sched_wakeup_granularity; | 859 | se = parent_entity(se); |
837 | if (unlikely(se->load.weight != NICE_0_LOAD)) | 860 | pse = parent_entity(pse); |
838 | gran = calc_delta_fair(gran, &se->load); | 861 | } |
839 | 862 | ||
840 | if (delta > gran) { | 863 | gran = sysctl_sched_wakeup_granularity; |
841 | int now = !sched_feat(PREEMPT_RESTRICT); | 864 | if (unlikely(se->load.weight != NICE_0_LOAD)) |
865 | gran = calc_delta_fair(gran, &se->load); | ||
842 | 866 | ||
843 | if (now || p->prio < curr->prio || !se->peer_preempt++) | 867 | if (pse->vruntime + gran < se->vruntime) |
844 | resched_task(curr); | 868 | resched_task(curr); |
845 | } | ||
846 | } | ||
847 | } | 869 | } |
848 | 870 | ||
849 | static struct task_struct *pick_next_task_fair(struct rq *rq) | 871 | static struct task_struct *pick_next_task_fair(struct rq *rq) |
@@ -1045,8 +1067,9 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) | |||
1045 | update_curr(cfs_rq); | 1067 | update_curr(cfs_rq); |
1046 | place_entity(cfs_rq, se, 1); | 1068 | place_entity(cfs_rq, se, 1); |
1047 | 1069 | ||
1070 | /* 'curr' will be NULL if the child belongs to a different group */ | ||
1048 | if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) && | 1071 | if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) && |
1049 | curr->vruntime < se->vruntime) { | 1072 | curr && curr->vruntime < se->vruntime) { |
1050 | /* | 1073 | /* |
1051 | * Upon rescheduling, sched_class::put_prev_task() will place | 1074 | * Upon rescheduling, sched_class::put_prev_task() will place |
1052 | * 'current' within the tree based on its new key value. | 1075 | * 'current' within the tree based on its new key value. |
@@ -1054,7 +1077,6 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) | |||
1054 | swap(curr->vruntime, se->vruntime); | 1077 | swap(curr->vruntime, se->vruntime); |
1055 | } | 1078 | } |
1056 | 1079 | ||
1057 | se->peer_preempt = 0; | ||
1058 | enqueue_task_fair(rq, p, 0); | 1080 | enqueue_task_fair(rq, p, 0); |
1059 | resched_task(rq->curr); | 1081 | resched_task(rq->curr); |
1060 | } | 1082 | } |
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index ef1a7df80ea2..630178e53bb6 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -127,7 +127,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) | |||
127 | # define schedstat_set(var, val) do { } while (0) | 127 | # define schedstat_set(var, val) do { } while (0) |
128 | #endif | 128 | #endif |
129 | 129 | ||
130 | #ifdef CONFIG_SCHEDSTATS | 130 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
131 | /* | 131 | /* |
132 | * Called when a process is dequeued from the active array and given | 132 | * Called when a process is dequeued from the active array and given |
133 | * the cpu. We should note that with the exception of interactive | 133 | * the cpu. We should note that with the exception of interactive |
@@ -155,7 +155,7 @@ static inline void sched_info_dequeued(struct task_struct *t) | |||
155 | */ | 155 | */ |
156 | static void sched_info_arrive(struct task_struct *t) | 156 | static void sched_info_arrive(struct task_struct *t) |
157 | { | 157 | { |
158 | unsigned long long now = sched_clock(), delta = 0; | 158 | unsigned long long now = task_rq(t)->clock, delta = 0; |
159 | 159 | ||
160 | if (t->sched_info.last_queued) | 160 | if (t->sched_info.last_queued) |
161 | delta = now - t->sched_info.last_queued; | 161 | delta = now - t->sched_info.last_queued; |
@@ -186,7 +186,7 @@ static inline void sched_info_queued(struct task_struct *t) | |||
186 | { | 186 | { |
187 | if (unlikely(sched_info_on())) | 187 | if (unlikely(sched_info_on())) |
188 | if (!t->sched_info.last_queued) | 188 | if (!t->sched_info.last_queued) |
189 | t->sched_info.last_queued = sched_clock(); | 189 | t->sched_info.last_queued = task_rq(t)->clock; |
190 | } | 190 | } |
191 | 191 | ||
192 | /* | 192 | /* |
@@ -195,7 +195,8 @@ static inline void sched_info_queued(struct task_struct *t) | |||
195 | */ | 195 | */ |
196 | static inline void sched_info_depart(struct task_struct *t) | 196 | static inline void sched_info_depart(struct task_struct *t) |
197 | { | 197 | { |
198 | unsigned long long delta = sched_clock() - t->sched_info.last_arrival; | 198 | unsigned long long delta = task_rq(t)->clock - |
199 | t->sched_info.last_arrival; | ||
199 | 200 | ||
200 | t->sched_info.cpu_time += delta; | 201 | t->sched_info.cpu_time += delta; |
201 | rq_sched_info_depart(task_rq(t), delta); | 202 | rq_sched_info_depart(task_rq(t), delta); |
@@ -231,5 +232,5 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
231 | #else | 232 | #else |
232 | #define sched_info_queued(t) do { } while (0) | 233 | #define sched_info_queued(t) do { } while (0) |
233 | #define sched_info_switch(t, next) do { } while (0) | 234 | #define sched_info_switch(t, next) do { } while (0) |
234 | #endif /* CONFIG_SCHEDSTATS */ | 235 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ |
235 | 236 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3b4efbe26445..3a1744fed2b6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -226,20 +226,23 @@ static struct ctl_table root_table[] = { | |||
226 | 226 | ||
227 | #ifdef CONFIG_SCHED_DEBUG | 227 | #ifdef CONFIG_SCHED_DEBUG |
228 | static unsigned long min_sched_granularity_ns = 100000; /* 100 usecs */ | 228 | static unsigned long min_sched_granularity_ns = 100000; /* 100 usecs */ |
229 | static unsigned long max_sched_granularity_ns = 1000000000; /* 1 second */ | 229 | static unsigned long max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ |
230 | static unsigned long min_wakeup_granularity_ns; /* 0 usecs */ | 230 | static unsigned long min_wakeup_granularity_ns; /* 0 usecs */ |
231 | static unsigned long max_wakeup_granularity_ns = 1000000000; /* 1 second */ | 231 | static unsigned long max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ |
232 | #endif | 232 | #endif |
233 | 233 | ||
234 | static struct ctl_table kern_table[] = { | 234 | static struct ctl_table kern_table[] = { |
235 | #ifdef CONFIG_SCHED_DEBUG | 235 | #ifdef CONFIG_SCHED_DEBUG |
236 | { | 236 | { |
237 | .ctl_name = CTL_UNNUMBERED, | 237 | .ctl_name = CTL_UNNUMBERED, |
238 | .procname = "sched_nr_latency", | 238 | .procname = "sched_min_granularity_ns", |
239 | .data = &sysctl_sched_nr_latency, | 239 | .data = &sysctl_sched_min_granularity, |
240 | .maxlen = sizeof(unsigned int), | 240 | .maxlen = sizeof(unsigned int), |
241 | .mode = 0644, | 241 | .mode = 0644, |
242 | .proc_handler = &proc_dointvec, | 242 | .proc_handler = &sched_nr_latency_handler, |
243 | .strategy = &sysctl_intvec, | ||
244 | .extra1 = &min_sched_granularity_ns, | ||
245 | .extra2 = &max_sched_granularity_ns, | ||
243 | }, | 246 | }, |
244 | { | 247 | { |
245 | .ctl_name = CTL_UNNUMBERED, | 248 | .ctl_name = CTL_UNNUMBERED, |
@@ -247,7 +250,7 @@ static struct ctl_table kern_table[] = { | |||
247 | .data = &sysctl_sched_latency, | 250 | .data = &sysctl_sched_latency, |
248 | .maxlen = sizeof(unsigned int), | 251 | .maxlen = sizeof(unsigned int), |
249 | .mode = 0644, | 252 | .mode = 0644, |
250 | .proc_handler = &proc_dointvec_minmax, | 253 | .proc_handler = &sched_nr_latency_handler, |
251 | .strategy = &sysctl_intvec, | 254 | .strategy = &sysctl_intvec, |
252 | .extra1 = &min_sched_granularity_ns, | 255 | .extra1 = &min_sched_granularity_ns, |
253 | .extra2 = &max_sched_granularity_ns, | 256 | .extra2 = &max_sched_granularity_ns, |
@@ -298,6 +301,14 @@ static struct ctl_table kern_table[] = { | |||
298 | .mode = 0644, | 301 | .mode = 0644, |
299 | .proc_handler = &proc_dointvec, | 302 | .proc_handler = &proc_dointvec, |
300 | }, | 303 | }, |
304 | { | ||
305 | .ctl_name = CTL_UNNUMBERED, | ||
306 | .procname = "sched_nr_migrate", | ||
307 | .data = &sysctl_sched_nr_migrate, | ||
308 | .maxlen = sizeof(unsigned int), | ||
309 | .mode = 644, | ||
310 | .proc_handler = &proc_dointvec, | ||
311 | }, | ||
301 | #endif | 312 | #endif |
302 | { | 313 | { |
303 | .ctl_name = CTL_UNNUMBERED, | 314 | .ctl_name = CTL_UNNUMBERED, |
diff --git a/kernel/timer.c b/kernel/timer.c index 00e44e2afd67..a05817c021d6 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -817,6 +817,19 @@ unsigned long next_timer_interrupt(void) | |||
817 | 817 | ||
818 | #endif | 818 | #endif |
819 | 819 | ||
820 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
821 | void account_process_tick(struct task_struct *p, int user_tick) | ||
822 | { | ||
823 | if (user_tick) { | ||
824 | account_user_time(p, jiffies_to_cputime(1)); | ||
825 | account_user_time_scaled(p, jiffies_to_cputime(1)); | ||
826 | } else { | ||
827 | account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); | ||
828 | account_system_time_scaled(p, jiffies_to_cputime(1)); | ||
829 | } | ||
830 | } | ||
831 | #endif | ||
832 | |||
820 | /* | 833 | /* |
821 | * Called from the timer interrupt handler to charge one tick to the current | 834 | * Called from the timer interrupt handler to charge one tick to the current |
822 | * process. user_tick is 1 if the tick is user time, 0 for system. | 835 | * process. user_tick is 1 if the tick is user time, 0 for system. |
@@ -827,13 +840,7 @@ void update_process_times(int user_tick) | |||
827 | int cpu = smp_processor_id(); | 840 | int cpu = smp_processor_id(); |
828 | 841 | ||
829 | /* Note: this timer irq context must be accounted for as well. */ | 842 | /* Note: this timer irq context must be accounted for as well. */ |
830 | if (user_tick) { | 843 | account_process_tick(p, user_tick); |
831 | account_user_time(p, jiffies_to_cputime(1)); | ||
832 | account_user_time_scaled(p, jiffies_to_cputime(1)); | ||
833 | } else { | ||
834 | account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); | ||
835 | account_system_time_scaled(p, jiffies_to_cputime(1)); | ||
836 | } | ||
837 | run_local_timers(); | 844 | run_local_timers(); |
838 | if (rcu_pending(cpu)) | 845 | if (rcu_pending(cpu)) |
839 | rcu_check_callbacks(cpu, user_tick); | 846 | rcu_check_callbacks(cpu, user_tick); |