From bd3bff9e20f454b242d979ec2f9a4dca0d5fa06f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:41 +0200 Subject: sched: add latency tracer callbacks to the scheduler add 3 lightweight callbacks to the tracer backend. zero impact if tracing is turned off. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a6176f4b..717cab8a0c83 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2117,6 +2117,32 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +extern void +ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next); +#else +static inline void +ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +{ +} +#endif + +#ifdef CONFIG_SCHED_TRACER +extern void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); +extern void +ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr); +#else +static inline void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +{ +} +static inline void +ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) +{ +} +#endif + extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); -- cgit v1.2.2 From 7c731e0a495e25e79dc1e9e68772a67a55721a65 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: ftrace: make the task state char-string visible to all The tracer wants to be able to convert the state number into a user visible character. This patch pulls that conversion string out the scheduler into the header. This way if it were to ever change, other parts of the kernel will know. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 717cab8a0c83..6e26f1fdbfe2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2237,6 +2237,8 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ +#define TASK_STATE_TO_CHAR_STR "RSDTtZX" + #endif /* __KERNEL__ */ #endif -- cgit v1.2.2 From 8ac0fca4ccb355ce50471d7aa3f10f5900b28b95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: sched tracer fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e26f1fdbfe2..05744f9cb096 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2130,17 +2130,11 @@ ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) #ifdef CONFIG_SCHED_TRACER extern void ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); -extern void -ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr); #else static inline void ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) { } -static inline void -ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) -{ -} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -- cgit v1.2.2 From 4e65551905fb0300ae7e667cbaa41ee2e3f29a13 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: sched tracer, trace full rbtree Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 05744f9cb096..652d380ae563 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2119,20 +2119,34 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern void -ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next); +ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); +extern void +ftrace_wake_up_task(void *rq, struct task_struct *wakee, + struct task_struct *curr); +extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void -ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) +{ +} +static inline void +sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3) +{ +} +static inline void +ftrace_wake_up_task(void *rq, struct task_struct *wakee, + struct task_struct *curr) +{ +} +static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } -#endif - -#ifdef CONFIG_SCHED_TRACER -extern void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); -#else static inline void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) { } #endif -- cgit v1.2.2 From 017730c11241e26577673eb9d957cfc66172ea91 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix wakeups Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 652d380ae563..a3970b563757 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -246,6 +246,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); +extern int runqueue_is_locked(void); + extern cpumask_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); -- cgit v1.2.2 From 1a3c3034336320554a3342572dae98d69e054fc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix __trace_special() Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index a3970b563757..5b186bed54bc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2119,6 +2119,18 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif +#ifdef CONFIG_TRACING +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); +#else +static inline void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ +} +#endif + #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); @@ -2126,9 +2138,6 @@ extern void ftrace_wake_up_task(void *rq, struct task_struct *wakee, struct task_struct *curr); extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); -extern void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) @@ -2146,11 +2155,6 @@ ftrace_wake_up_task(void *rq, struct task_struct *wakee, static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } -static inline void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -- cgit v1.2.2 From 88a4216c3ec4281fc7e6725cc3a3ccd01fb1aa14 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: sched special Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5b186bed54bc..360ca99033d2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2138,6 +2138,8 @@ extern void ftrace_wake_up_task(void *rq, struct task_struct *wakee, struct task_struct *curr); extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) @@ -2155,6 +2157,10 @@ ftrace_wake_up_task(void *rq, struct task_struct *wakee, static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ +} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -- cgit v1.2.2 From 5b82a1b08a00b2adca3d9dd9777efff40b7aaaa1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:10 +0200 Subject: Port ftrace to markers Porting ftrace to the marker infrastructure. Don't need to chain to the wakeup tracer from the sched tracer, because markers support multiple probes connected. Signed-off-by: Mathieu Desnoyers CC: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 360ca99033d2..c0b1c69b55ce 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2131,38 +2131,6 @@ __trace_special(void *__tr, void *__data, } #endif -#ifdef CONFIG_CONTEXT_SWITCH_TRACER -extern void -ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); -extern void -ftrace_wake_up_task(void *rq, struct task_struct *wakee, - struct task_struct *curr); -extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); -extern void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); -#else -static inline void -ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) -{ -} -static inline void -sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3) -{ -} -static inline void -ftrace_wake_up_task(void *rq, struct task_struct *wakee, - struct task_struct *curr) -{ -} -static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) -{ -} -static inline void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} -#endif - extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); -- cgit v1.2.2 From 9c44bc03fff44ff04237a7d92e35304a0e50c331 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:04 +0200 Subject: softlockup: allow panic on lockup allow users to configure the softlockup detector to generate a panic instead of a warning message. high-availability systems might opt for this strict method (combined with panic_timeout= boot option/sysctl), instead of generating softlockup warnings ad infinitum. also, automated tests work better if the system reboots reliably (into a safe kernel) in case of a lockup. The full spectrum of configurability is supported: boot option, sysctl option and Kconfig option. it's default-disabled. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a6176f4b..71f5972dc48e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -294,7 +294,8 @@ extern void softlockup_tick(void); extern void spawn_softlockup_task(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); -extern unsigned long softlockup_thresh; +extern unsigned int softlockup_panic; +extern unsigned long softlockup_thresh; extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; -- cgit v1.2.2 From 9383d9679056e6cc4e7ff70f31da945a268238f4 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Mon, 12 May 2008 21:21:14 +0200 Subject: softlockup: fix softlockup_thresh unaligned access and disable detection at runtime Fix unaligned access errors when setting softlockup_thresh on 64 bit platforms. Allow softlockup detection to be disabled by setting softlockup_thresh <= 0. Detect that boot time softlockup detection has been disabled earlier in softlockup_tick. Signed-off-by: Dimitri Sivanich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 71f5972dc48e..ea26221644e2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -295,10 +295,10 @@ extern void spawn_softlockup_task(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; -extern unsigned long softlockup_thresh; extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; +extern int softlockup_thresh; #else static inline void softlockup_tick(void) { -- cgit v1.2.2 From 554ec22f075d46e4363520a407d2b7eeb5dfdd43 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:03 +0200 Subject: namespacecheck: more sched.c fixes [ Stephen Rothwell : build fix ] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index ae0be3c62375..dc36c3aea018 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -134,7 +134,6 @@ extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); -extern unsigned long weighted_cpuload(const int cpu); struct seq_file; struct cfs_rq; @@ -823,23 +822,6 @@ extern int arch_reinit_sched_domains(void); #endif /* CONFIG_SMP */ -/* - * A runqueue laden with a single nice 0 task scores a weighted_cpuload of - * SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a - * task of nice 0 or enough lower priority tasks to bring up the - * weighted_cpuload - */ -static inline int above_background_load(void) -{ - unsigned long cpu; - - for_each_online_cpu(cpu) { - if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE) - return 1; - } - return 0; -} - struct io_context; /* See blkdev.h */ #define NGROUPS_SMALL 32 #define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) -- cgit v1.2.2 From c7aceaba042702538b23cf4e0de1b2891ad8e671 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Thu, 15 May 2008 12:09:15 +0100 Subject: sched: reorder task_struct to reduce padding on 64bit builds This patch removes 24 bytes of padding and allows 1 extra object per slab on my fedora based config. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index dc36c3aea018..ea2857b99596 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1021,6 +1021,7 @@ struct task_struct { #endif int prio, static_prio, normal_prio; + unsigned int rt_priority; const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; @@ -1104,7 +1105,6 @@ struct task_struct { int __user *set_child_tid; /* CLONE_CHILD_SETTID */ int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ - unsigned int rt_priority; cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; cputime_t prev_utime, prev_stime; @@ -1123,12 +1123,12 @@ struct task_struct { gid_t gid,egid,sgid,fsgid; struct group_info *group_info; kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; - unsigned securebits; struct user_struct *user; + unsigned securebits; #ifdef CONFIG_KEYS + unsigned char jit_keyring; /* default keyring to attach requested keys to */ struct key *request_key_auth; /* assumed request_key authority */ struct key *thread_keyring; /* keyring private to this thread */ - unsigned char jit_keyring; /* default keyring to attach requested keys to */ #endif char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock @@ -1215,8 +1215,8 @@ struct task_struct { # define MAX_LOCK_DEPTH 48UL u64 curr_chain_key; int lockdep_depth; - struct held_lock held_locks[MAX_LOCK_DEPTH]; unsigned int lockdep_recursion; + struct held_lock held_locks[MAX_LOCK_DEPTH]; #endif /* journalling filesystem info */ @@ -1244,10 +1244,6 @@ struct task_struct { u64 acct_vm_mem1; /* accumulated virtual memory usage */ cputime_t acct_stimexpd;/* stime since last update */ #endif -#ifdef CONFIG_NUMA - struct mempolicy *mempolicy; - short il_next; -#endif #ifdef CONFIG_CPUSETS nodemask_t mems_allowed; int cpuset_mems_generation; @@ -1266,6 +1262,10 @@ struct task_struct { #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; +#endif +#ifdef CONFIG_NUMA + struct mempolicy *mempolicy; + short il_next; #endif atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; -- cgit v1.2.2 From 1f11eb6a8bc92536d9e93ead48fa3ffbd1478571 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Wed, 4 Jun 2008 15:04:05 -0400 Subject: sched: fix cpupri hotplug support The RT folks over at RedHat found an issue w.r.t. hotplug support which was traced to problems with the cpupri infrastructure in the scheduler: https://bugzilla.redhat.com/show_bug.cgi?id=449676 This bug affects 23-rt12+, 24-rtX, 25-rtX, and sched-devel. This patch applies to 25.4-rt4, though it should trivially apply to most cpupri enabled kernels mentioned above. It turned out that the issue was that offline cpus could get inadvertently registered with cpupri so that they were erroneously selected during migration decisions. The end result would be an OOPS as the offline cpu had tasks routed to it. This patch generalizes the old join/leave domain interface into an online/offline interface, and adjusts the root-domain/hotplug code to utilize it. I was able to easily reproduce the issue prior to this patch, and am no longer able to reproduce it after this patch. I can offline cpus indefinately and everything seems to be in working order. Thanks to Arnaldo (acme), Thomas, and Peter for doing the legwork to point me in the right direction. Also thank you to Peter for reviewing the early iterations of this patch. Signed-off-by: Gregory Haskins Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index ea2857b99596..d25acf600a32 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -903,8 +903,8 @@ struct sched_class { void (*set_cpus_allowed)(struct task_struct *p, const cpumask_t *newmask); - void (*join_domain)(struct rq *rq); - void (*leave_domain)(struct rq *rq); + void (*rq_online)(struct rq *rq); + void (*rq_offline)(struct rq *rq); void (*switched_from) (struct rq *this_rq, struct task_struct *task, int running); -- cgit v1.2.2 From 9985b0bab332289f14837eff3c6e0bcc658b58f7 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 5 Jun 2008 12:57:11 -0700 Subject: sched: prevent bound kthreads from changing cpus_allowed Kthreads that have called kthread_bind() are bound to specific cpus, so other tasks should not be able to change their cpus_allowed from under them. Otherwise, it is possible to move kthreads, such as the migration or software watchdog threads, so they are not allowed access to the cpu they work on. Cc: Peter Zijlstra Cc: Paul Menage Cc: Paul Jackson Signed-off-by: David Rientjes Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index d25acf600a32..2db1485f865d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1486,6 +1486,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ +#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ -- cgit v1.2.2 From 961ccddd59d627b89bd3dc284b6517833bbdf25d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 23 Jun 2008 13:55:38 +1000 Subject: sched: add new API sched_setscheduler_nocheck: add a flag to control access checks Hidehiro Kawai noticed that sched_setscheduler() can fail in stop_machine: it calls sched_setscheduler() from insmod, which can have CAP_SYS_MODULE without CAP_SYS_NICE. Two cases could have failed, so are changed to sched_setscheduler_nocheck: kernel/softirq.c:cpu_callback() - CPU hotplug callback kernel/stop_machine.c:__stop_machine_run() - Called from various places, including modprobe() Signed-off-by: Rusty Russell Cc: Jeremy Fitzhardinge Cc: Hidehiro Kawai Cc: Andrew Morton Cc: linux-mm@kvack.org Cc: sugita Cc: Satoshi OSHIMA Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index c5d3f847ca8d..fe3b9b5d7390 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1655,6 +1655,8 @@ extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); +extern int sched_setscheduler_nocheck(struct task_struct *, int, + struct sched_param *); extern struct task_struct *idle_task(int cpu); extern struct task_struct *curr_task(int cpu); extern void set_curr_task(int cpu, struct task_struct *p); -- cgit v1.2.2 From c09595f63bb1909c5dc4dca288f4fe818561b5f3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Jun 2008 13:41:14 +0200 Subject: sched: revert revert of: fair-group: SMP-nice for group scheduling Try again.. Initial commit: 18d95a2832c1392a2d63227a7a6d433cb9f2037e Revert: 6363ca57c76b7b83639ca8c83fc285fa26a7880e Signed-off-by: Peter Zijlstra Cc: Srivatsa Vaddagiri Cc: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index eaf821072dbd..97a58b622ee1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -765,6 +765,7 @@ struct sched_domain { struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ cpumask_t span; /* span of all CPUs in this domain */ + int first_cpu; /* cache of the first cpu in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ -- cgit v1.2.2 From b6a86c746f5b708012809958462234d19e9c8177 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Jun 2008 13:41:18 +0200 Subject: sched: fix sched_domain aggregation Keeping the aggregate on the first cpu of the sched domain has two problems: - it could collide between different sched domains on different cpus - it could slow things down because of the remote accesses Signed-off-by: Peter Zijlstra Cc: Srivatsa Vaddagiri Cc: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 97a58b622ee1..eaf821072dbd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -765,7 +765,6 @@ struct sched_domain { struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ cpumask_t span; /* span of all CPUs in this domain */ - int first_cpu; /* cache of the first cpu in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ -- cgit v1.2.2 From 2398f2c6d34b43025f274fc42eaca34d23ec2320 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Jun 2008 13:41:35 +0200 Subject: sched: update shares on wakeup We found that the affine wakeup code needs rather accurate load figures to be effective. The trouble is that updating the load figures is fairly expensive with group scheduling. Therefore ratelimit the updating. Signed-off-by: Peter Zijlstra Cc: Srivatsa Vaddagiri Cc: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index eaf821072dbd..835b6c6fcc56 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -783,6 +783,8 @@ struct sched_domain { unsigned int balance_interval; /* initialise to 1. units in ms. */ unsigned int nr_balance_failed; /* initialise to 0 */ + u64 last_update; + #ifdef CONFIG_SCHEDSTATS /* load_balance() stats */ unsigned int lb_count[CPU_MAX_IDLE_TYPES]; @@ -1605,6 +1607,7 @@ extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; +extern unsigned int sysctl_sched_shares_ratelimit; int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, -- cgit v1.2.2 From af52a90a14cdaa54ecbfb6e6982abb13466a4b56 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 7 Jul 2008 14:16:52 -0400 Subject: sched_clock: stop maximum check on NO HZ Working with ftrace I would get large jumps of 11 millisecs or more with the clock tracer. This killed the latencing timings of ftrace and also caused the irqoff self tests to fail. What was happening is with NO_HZ the idle would stop the jiffy counter and before the jiffy counter was updated the sched_clock would have a bad delta jiffies to compare with the gtod with the maximum. The jiffies would stop and the last sched_tick would record the last gtod. On wakeup, the sched clock update would compare the gtod + delta jiffies (which would be zero) and compare it to the TSC. The TSC would have correctly (with a stable TSC) moved forward several jiffies. But because the jiffies has not been updated yet the clock would be prevented from moving forward because it would appear that the TSC jumped too far ahead. The clock would then virtually stop, until the jiffies are updated. Then the next sched clock update would see that the clock was very much behind since the delta jiffies is now correct. This would then jump the clock forward by several jiffies. This caused ftrace to report several milliseconds of interrupts off latency at every resume from NO_HZ idle. This patch adds hooks into the nohz code to disable the checking of the maximum clock update when nohz is in effect. It resumes the max check when nohz has updated the jiffies again. Signed-off-by: Steven Rostedt Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/sched.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index c5d3f847ca8d..33a8f42041fa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1573,13 +1573,28 @@ static inline void sched_clock_idle_sleep_event(void) static inline void sched_clock_idle_wakeup_event(u64 delta_ns) { } -#else + +#ifdef CONFIG_NO_HZ +static inline void sched_clock_tick_stop(int cpu) +{ +} + +static inline void sched_clock_tick_start(int cpu) +{ +} +#endif + +#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ extern void sched_clock_init(void); extern u64 sched_clock_cpu(int cpu); extern void sched_clock_tick(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); +#ifdef CONFIG_NO_HZ +extern void sched_clock_tick_stop(int cpu); +extern void sched_clock_tick_start(int cpu); #endif +#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ /* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu -- cgit v1.2.2 From ebb12db51f6c13b30752fcf506baad4c617b153c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 11 Jun 2008 22:04:29 +0200 Subject: Freezer: Introduce PF_FREEZER_NOSIG The freezer currently attempts to distinguish kernel threads from user space tasks by checking if their mm pointer is unset and it does not send fake signals to kernel threads. However, there are kernel threads, mostly related to networking, that behave like user space tasks and may want to be sent a fake signal to be frozen. Introduce the new process flag PF_FREEZER_NOSIG that will be set by default for all kernel threads and make the freezer only send fake signals to the tasks having PF_FREEZER_NOSIG unset. Provide the set_freezable_with_signal() function to be called by the kernel threads that want to be sent a fake signal for freezing. This patch should not change the freezer's observable behavior. Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen Acked-by: Pavel Machek Signed-off-by: Len Brown --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 21349173d148..ba2f859c6e4f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1494,6 +1494,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ +#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ /* * Only the _current_ task can read/write to tsk->flags, but other -- cgit v1.2.2 From f470021adb9190819c03d6d8c5c860a17480aa6d Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 24 Mar 2008 18:36:23 -0700 Subject: ptrace children revamp ptrace no longer fiddles with the children/sibling links, and the old ptrace_children list is gone. Now ptrace, whether of one's own children or another's via PTRACE_ATTACH, just uses the new ptraced list instead. There should be no user-visible difference that matters. The only change is the order in which do_wait() sees multiple stopped children and stopped ptrace attachees. Since wait_task_stopped() was changed earlier so it no longer reorders the children list, we already know this won't cause any new problems. Signed-off-by: Roland McGrath --- include/linux/sched.h | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index ba2f859c6e4f..1941d8b5cf11 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1062,12 +1062,6 @@ struct task_struct { #endif struct list_head tasks; - /* - * ptrace_list/ptrace_children forms the list of my children - * that were stolen by a ptracer. - */ - struct list_head ptrace_children; - struct list_head ptrace_list; struct mm_struct *mm, *active_mm; @@ -1089,18 +1083,25 @@ struct task_struct { /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with - * p->parent->pid) + * p->real_parent->pid) */ - struct task_struct *real_parent; /* real parent process (when being debugged) */ - struct task_struct *parent; /* parent process */ + struct task_struct *real_parent; /* real parent process */ + struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */ /* - * children/sibling forms the list of my children plus the - * tasks I'm ptracing. + * children/sibling forms the list of my natural children */ struct list_head children; /* list of my children */ struct list_head sibling; /* linkage in my parent's children list */ struct task_struct *group_leader; /* threadgroup leader */ + /* + * ptraced is the list of tasks this task is using ptrace on. + * This includes both natural children and PTRACE_ATTACH targets. + * p->ptrace_entry is p's link on the p->parent->ptraced list. + */ + struct list_head ptraced; + struct list_head ptrace_entry; + /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; struct list_head thread_group; @@ -1876,9 +1877,6 @@ extern void wait_task_inactive(struct task_struct * p); #define wait_task_inactive(p) do { } while (0) #endif -#define remove_parent(p) list_del_init(&(p)->sibling) -#define add_parent(p) list_add_tail(&(p)->sibling,&(p)->parent->children) - #define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) #define for_each_process(p) \ -- cgit v1.2.2 From 1b427c153a08fdbc092c2bdbf845b92fda58d857 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 18 Jul 2008 14:01:39 +0200 Subject: sched: fix build error, provide partition_sched_domains() unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit provide an empty partition_sched_domains() definition for the UP case: include/linux/cpuset.h: In function ‘rebuild_sched_domains': include/linux/cpuset.h:163: error: implicit declaration of function ‘partition_sched_domains' Signed-off-by: Ingo Molnar --- include/linux/sched.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1941d8b5cf11..26da921530fe 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -824,7 +824,16 @@ extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new); extern int arch_reinit_sched_domains(void); -#endif /* CONFIG_SMP */ +#else /* CONFIG_SMP */ + +struct sched_domain_attr; + +static inline void +partition_sched_domains(int ndoms_new, cpumask_t *doms_new, + struct sched_domain_attr *dattr_new) +{ +} +#endif /* !CONFIG_SMP */ struct io_context; /* See blkdev.h */ #define NGROUPS_SMALL 32 -- cgit v1.2.2 From 8b05c7e6e159d2f33c9275281b8b909a89eb7c5d Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 23 Jul 2008 21:26:53 -0700 Subject: add a helper function to test if an object is on the stack lib/debugobjects.c has a function to test if an object is on the stack. The block layer and ide needs it (they need to avoid DMA from/to stack buffers). This patch moves the function to include/linux/sched.h so that everyone can use it. lib/debugobjects.c uses current->stack but this patch uses a task_stack_page() accessor, which is a preferable way to access the stack. Signed-off-by: FUJITA Tomonori Cc: Christoph Lameter Cc: Andy Whitcroft Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index dc7e592c473a..6aca4a16e377 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1983,6 +1983,13 @@ static inline unsigned long *end_of_stack(struct task_struct *p) #endif +static inline int object_is_on_stack(void *obj) +{ + void *stack = task_stack_page(current); + + return (obj >= stack) && (obj < (stack + THREAD_SIZE)); +} + extern void thread_info_cache_init(void); /* set thread flags in other task's structures -- cgit v1.2.2 From 364d3c13c17f45da6d638011078d4c4d3070d719 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:36 -0700 Subject: ptrace: give more respect to SIGKILL ptrace_stop() has some complicated checks to prevent the scheduling in the TASK_TRACED state with the pending SIGKILL, but these checks are racy, and they depend on arch_ptrace_stop_needed(). This patch assumes that the traced task should die asap if it was killed by SIGKILL, in that case schedule()->signal_pending_state() has no reason to ignore the TASK_WAKEKILL part of TASK_TRACED, and we can kill this nasty special case. Note: do_exit()->ptrace_notify() is special, the killed task can already dequeue SIGKILL at this point. Another indication that fatal_signal_pending() is not exactly right. Signed-off-by: Oleg Nesterov Cc: Ingo Molnar Cc: Matthew Wilcox Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6aca4a16e377..79e749dbf81e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2054,9 +2054,6 @@ static inline int signal_pending_state(long state, struct task_struct *p) if (!signal_pending(p)) return 0; - if (state & (__TASK_STOPPED | __TASK_TRACED)) - return 0; - return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); } -- cgit v1.2.2 From 7b34e4283c685f5cc6ba6d30e939906eee0d4bcf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:37 -0700 Subject: introduce PF_KTHREAD flag Introduce the new PF_KTHREAD flag to mark the kernel threads. It is set by INIT_TASK() and copied to the forked childs (we could set it in kthreadd() along with PF_NOFREEZE instead). daemonize() was changed as well. In that case testing of PF_KTHREAD is racy, but daemonize() is hopeless anyway. This flag is cleared in do_execve(), before search_binary_handler(). Probably not the best place, we can do this in exec_mmap() or in start_thread(), or clear it along with PF_FORKNOEXEC. But I think this doesn't matter in practice, and if do_execve() fails kthread should die soon. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 79e749dbf81e..eec64a4adb9d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1483,6 +1483,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ +#define PF_KTHREAD 0x00000020 /* I am a kernel thread */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* dumped core */ -- cgit v1.2.2 From 246bb0b1deb29726990620d8b5e55ca29f331362 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 25 Jul 2008 01:47:38 -0700 Subject: kill PF_BORROWED_MM in favour of PF_KTHREAD Kill PF_BORROWED_MM. Change use_mm/unuse_mm to not play with ->flags, and do s/PF_BORROWED_MM/PF_KTHREAD/ for a couple of other users. No functional changes yet. But this allows us to do further fixes/cleanups. oom_kill/ptrace/etc often check "p->mm != NULL" to filter out the kthreads, this is wrong because of use_mm(). The problem with PF_BORROWED_MM is that we need task_lock() to avoid races. With this patch we can check PF_KTHREAD directly, or use a simple lockless helper: /* The result must not be dereferenced !!! */ struct mm_struct *__get_task_mm(struct task_struct *tsk) { if (tsk->flags & PF_KTHREAD) return NULL; return tsk->mm; } Note also ecard_task(). It runs with ->mm != NULL, but it's the kernel thread without PF_BORROWED_MM. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index eec64a4adb9d..0560999eb1db 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1483,7 +1483,6 @@ static inline void put_task_struct(struct task_struct *t) #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ -#define PF_KTHREAD 0x00000020 /* I am a kernel thread */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* dumped core */ @@ -1497,7 +1496,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_KSWAPD 0x00040000 /* I am kswapd */ #define PF_SWAPOFF 0x00080000 /* I am in swapoff */ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ -#define PF_BORROWED_MM 0x00200000 /* I am a kthread doing use_mm */ +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ -- cgit v1.2.2 From 19b0cfcca41dd772065671ad0584e1cea0f3fd13 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 25 Jul 2008 01:48:35 -0700 Subject: pidns: remove now unused kill_proc function This function operated on a pid_t to kill a task, which is no longer valid in a containerized system. It has finally lost all its users and we can safely remove it from the tree. Signed-off-by: Pavel Emelyanov Cc: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0560999eb1db..134cb5cb506c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1800,7 +1800,6 @@ extern void force_sig(int, struct task_struct *); extern void force_sig_specific(int, struct task_struct *); extern int send_sig(int, struct task_struct *, int); extern void zap_other_threads(struct task_struct *p); -extern int kill_proc(pid_t, int, int); extern struct sigqueue *sigqueue_alloc(void); extern void sigqueue_free(struct sigqueue *); extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); -- cgit v1.2.2 From e49859e71e0318b564de1546bdc30fab738f9deb Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 25 Jul 2008 01:48:36 -0700 Subject: pidns: remove now unused find_pid function. This one had the only users so far - the kill_proc, which is removed, so drop this (invalid in namespaced world) call too. And of course - erase all references on it from comments. Signed-off-by: Pavel Emelyanov Cc: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 134cb5cb506c..182da1550fad 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1718,7 +1718,7 @@ extern struct pid_namespace init_pid_ns; * find_task_by_pid(): * finds a task by its global pid * - * see also find_pid() etc in include/linux/pid.h + * see also find_vpid() etc in include/linux/pid.h */ extern struct task_struct *find_task_by_pid_type_ns(int type, int pid, -- cgit v1.2.2 From dbda0de52618d13d1b927c7ba7bb839cfddc4e8c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 25 Jul 2008 01:48:37 -0700 Subject: pidns: remove find_task_by_pid, unused for a long time It seems to me that it was a mistake marking this function as deprecated and scheduling it for removal, rather than resolutely removing it after the last caller's death. Anyway - better late, then never. Signed-off-by: Pavel Emelyanov Cc: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 182da1550fad..354ef478a80d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1715,8 +1715,6 @@ extern struct pid_namespace init_pid_ns; * finds a task by its pid in the specified namespace * find_task_by_vpid(): * finds a task by its virtual pid - * find_task_by_pid(): - * finds a task by its global pid * * see also find_vpid() etc in include/linux/pid.h */ @@ -1724,10 +1722,6 @@ extern struct pid_namespace init_pid_ns; extern struct task_struct *find_task_by_pid_type_ns(int type, int pid, struct pid_namespace *ns); -static inline struct task_struct *__deprecated find_task_by_pid(pid_t nr) -{ - return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns); -} extern struct task_struct *find_task_by_vpid(pid_t nr); extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); -- cgit v1.2.2 From 49b5cf34727a6c1be1568ab28e89a2d9a6bf51e0 Mon Sep 17 00:00:00 2001 From: Jonathan Lim Date: Fri, 25 Jul 2008 01:48:40 -0700 Subject: accounting: account for user time when updating memory integrals Adapt acct_update_integrals() to include user time when calculating the time difference. The units of acct_rss_mem1 and acct_vm_mem1 are also changed from pages-jiffies to pages-usecs to avoid calling jiffies_to_usecs() in xacct_add_tsk() which might overflow. Signed-off-by: Jonathan Lim Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 354ef478a80d..af780f299c7c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1257,7 +1257,7 @@ struct task_struct { #if defined(CONFIG_TASK_XACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ - cputime_t acct_stimexpd;/* stime since last update */ + cputime_t acct_timexpd; /* stime + utime since last update */ #endif #ifdef CONFIG_CPUSETS nodemask_t mems_allowed; -- cgit v1.2.2 From 297c5d92634c809cef23d73e7b2556f2528ff7e2 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Fri, 25 Jul 2008 01:48:49 -0700 Subject: task IO accounting: provide distinct tgid/tid I/O statistics Report per-thread I/O statistics in /proc/pid/task/tid/io and aggregate parent I/O statistics in /proc/pid/io. This approach follows the same model used to account per-process and per-thread CPU times. As a practial application, this allows for example to quickly find the top I/O consumer when a process spawns many child threads that perform the actual I/O work, because the aggregated I/O statistics can always be found in /proc/pid/io. [ Oleg Nesterov points out that we should check that the task is still alive before we iterate over the threads, but also says that we can do that fixup on top of this later. - Linus ] Acked-by: Balbir Singh Signed-off-by: Andrea Righi Cc: Matt Heaton Cc: Shailabh Nagar Acked-by-with-comments: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index af780f299c7c..d22ffe06d0eb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -506,6 +506,10 @@ struct signal_struct { unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; +#ifdef CONFIG_TASK_XACCT + u64 rchar, wchar, syscr, syscw; +#endif + struct task_io_accounting ioac; /* * Cumulative ns of scheduled CPU time for dead threads in the -- cgit v1.2.2 From 873b47717732c2f33a4b14de02571a4295a02f0c Mon Sep 17 00:00:00 2001 From: Keika Kobayashi Date: Fri, 25 Jul 2008 01:48:52 -0700 Subject: per-task-delay-accounting: add memory reclaim delay Sometimes, application responses become bad under heavy memory load. Applications take a bit time to reclaim memory. The statistics, how long memory reclaim takes, will be useful to measure memory usage. This patch adds accounting memory reclaim to per-task-delay-accounting for accounting the time of do_try_to_free_pages(). - When System is under low memory load, memory reclaim may not occur. $ free total used free shared buffers cached Mem: 8197800 1577300 6620500 0 4808 1516724 -/+ buffers/cache: 55768 8142032 Swap: 16386292 0 16386292 $ vmstat 1 procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu---- r b swpd free buff cache si so bi bo in cs us sy id wa 0 0 0 5069748 10612 3014060 0 0 0 0 3 26 0 0 100 0 0 0 0 5069748 10612 3014060 0 0 0 0 4 22 0 0 100 0 0 0 0 5069748 10612 3014060 0 0 0 0 3 18 0 0 100 0 Measure the time of tar command. $ ls -s test.dat 1501472 test.dat $ time tar cvf test.tar test.dat real 0m13.388s user 0m0.116s sys 0m5.304s $ ./delayget -d -p CPU count real total virtual total delay total 428 5528345500 5477116080 62749891 IO count delay total 338 8078977189 SWAP count delay total 0 0 RECLAIM count delay total 0 0 - When system is under heavy memory load memory reclaim may occur. $ vmstat 1 procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu---- r b swpd free buff cache si so bi bo in cs us sy id wa 0 0 7159032 49724 1812 3012 0 0 0 0 3 24 0 0 100 0 0 0 7159032 49724 1812 3012 0 0 0 0 4 24 0 0 100 0 0 0 7159032 49848 1812 3012 0 0 0 0 3 22 0 0 100 0 In this case, one process uses more 8G memory by execution of malloc() and memset(). $ time tar cvf test.tar test.dat real 1m38.563s <- increased by 85 sec user 0m0.140s sys 0m7.060s $ ./delayget -d -p CPU count real total virtual total delay total 9021 7140446250 7315277975 923201824 IO count delay total 8965 90466349669 SWAP count delay total 3 21036367 RECLAIM count delay total 740 61011951153 In the later case, the value of RECLAIM is increasing. So, taskstats can show how much memory reclaim influences TAT. Signed-off-by: Keika Kobayashi Acked-by: Balbir Singh Acked-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index d22ffe06d0eb..42036ffe6b00 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -672,6 +672,10 @@ struct task_delay_info { /* io operations performed */ u32 swapin_count; /* total count of the number of swapin block */ /* io operations performed */ + + struct timespec freepages_start, freepages_end; + u64 freepages_delay; /* wait for memory reclaim */ + u32 freepages_count; /* total count of memory reclaim */ }; #endif /* CONFIG_TASK_DELAY_ACCT */ -- cgit v1.2.2 From 16d69265b930f7e2fa9eea381715696f780718f4 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 25 Jul 2008 19:44:36 -0700 Subject: uninline arch_pick_mmap_layout() Fix this, on avr32: include/linux/utsname.h:35, from init/main.c:20: include/linux/sched.h: In function 'arch_pick_mmap_layout': include/linux/sched.h:2149: error: implicit declaration of function 'PAGE_ALIGN' Reported-by: Adrian Bunk Cc: Haavard Skinnemoen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 42036ffe6b00..3260a5c42b91 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2139,16 +2139,7 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #endif /* CONFIG_SMP */ -#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT extern void arch_pick_mmap_layout(struct mm_struct *mm); -#else -static inline void arch_pick_mmap_layout(struct mm_struct *mm) -{ - mm->mmap_base = TASK_UNMAPPED_BASE; - mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; -} -#endif #ifdef CONFIG_TRACING extern void -- cgit v1.2.2 From 7babe8db99d305340cf4828ce1f5a1481d5622ef Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Fri, 25 Jul 2008 19:45:11 -0700 Subject: Full conversion to early_initcall() interface, remove old interface A previous patch added the early_initcall(), to allow a cleaner hooking of pre-SMP initcalls. Now we remove the older interface, converting all existing users to the new one. [akpm@linux-foundation.org: cleanups] [akpm@linux-foundation.org: build fix] [kosaki.motohiro@jp.fujitsu.com: warning fix] [kosaki.motohiro@jp.fujitsu.com: warning fix] Signed-off-by: Eduard - Gabriel Munteanu Cc: Tom Zanussi Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3260a5c42b91..adb8077dc463 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -292,7 +292,6 @@ extern void sched_show_task(struct task_struct *p); #ifdef CONFIG_DETECT_SOFTLOCKUP extern void softlockup_tick(void); -extern void spawn_softlockup_task(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; @@ -2222,14 +2221,6 @@ static inline void inc_syscw(struct task_struct *tsk) } #endif -#ifdef CONFIG_SMP -void migration_init(void); -#else -static inline void migration_init(void) -{ -} -#endif - #ifndef TASK_SIZE_OF #define TASK_SIZE_OF(tsk) TASK_SIZE #endif -- cgit v1.2.2 From 2b2a1ff64afbadac842bbc58c5166962cf4f7664 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 25 Jul 2008 19:45:54 -0700 Subject: tracehook: death This moves the ptrace logic in task death (exit_notify) into tracehook.h inlines. Some code is rearranged slightly to make things nicer. There is no change, only cleanup. There is one hook called with the tasklist_lock write-locked, as ptrace needs. There is also a new hook called after exit_state changes and without locks. This is a better place for tracing work to be in the future, since it doesn't delay the whole system with locking. Signed-off-by: Roland McGrath Cc: Oleg Nesterov Reviewed-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index adb8077dc463..a95d84d0da95 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1796,7 +1796,7 @@ extern int kill_pid_info_as_uid(int, struct siginfo *, struct pid *, uid_t, uid_ extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern int kill_proc_info(int, struct siginfo *, pid_t); -extern void do_notify_parent(struct task_struct *, int); +extern int do_notify_parent(struct task_struct *, int); extern void force_sig(int, struct task_struct *); extern void force_sig_specific(int, struct task_struct *); extern int send_sig(int, struct task_struct *, int); -- cgit v1.2.2 From 85ba2d862e521375a8ee01526c5c46b1f24bb4af Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 25 Jul 2008 19:45:58 -0700 Subject: tracehook: wait_task_inactive This extends wait_task_inactive() with a new argument so it can be used in a "soft" mode where it will check for the task changing state unexpectedly and back off. There is no change to existing callers. This lays the groundwork to allow robust, noninvasive tracing that can try to sample a blocked thread but back off safely if it wakes up. Signed-off-by: Roland McGrath Cc: Oleg Nesterov Reviewed-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index a95d84d0da95..f59318a0099b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1882,9 +1882,13 @@ extern void set_task_comm(struct task_struct *tsk, char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP -extern void wait_task_inactive(struct task_struct * p); +extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else -#define wait_task_inactive(p) do { } while (0) +static inline unsigned long wait_task_inactive(struct task_struct *p, + long match_state) +{ + return 1; +} #endif #define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) -- cgit v1.2.2 From 5995477ab7f3522c497c9c4a1c55373e9d655574 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sun, 27 Jul 2008 17:29:15 +0200 Subject: task IO accounting: improve code readability Put all i/o statistics in struct proc_io_accounting and use inline functions to initialize and increment statistics, removing a lot of single variable assignments. This also reduces the kernel size as following (with CONFIG_TASK_XACCT=y and CONFIG_TASK_IO_ACCOUNTING=y). text data bss dec hex filename 11651 0 0 11651 2d83 kernel/exit.o.before 11619 0 0 11619 2d63 kernel/exit.o.after 10886 132 136 11154 2b92 kernel/fork.o.before 10758 132 136 11026 2b12 kernel/fork.o.after 3082029 807968 4818600 8708597 84e1f5 vmlinux.o.before 3081869 807968 4818600 8708437 84e155 vmlinux.o.after Signed-off-by: Andrea Righi Acked-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- include/linux/sched.h | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index f59318a0099b..034c1ca6b332 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -505,10 +505,7 @@ struct signal_struct { unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; -#ifdef CONFIG_TASK_XACCT - u64 rchar, wchar, syscr, syscw; -#endif - struct task_io_accounting ioac; + struct proc_io_accounting ioac; /* * Cumulative ns of scheduled CPU time for dead threads in the @@ -1256,11 +1253,7 @@ struct task_struct { unsigned long ptrace_message; siginfo_t *last_siginfo; /* For ptrace use. */ -#ifdef CONFIG_TASK_XACCT -/* i/o counters(bytes read/written, #syscalls */ - u64 rchar, wchar, syscr, syscw; -#endif - struct task_io_accounting ioac; + struct proc_io_accounting ioac; #if defined(CONFIG_TASK_XACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ @@ -2190,22 +2183,22 @@ extern long sched_group_rt_period(struct task_group *tg); #ifdef CONFIG_TASK_XACCT static inline void add_rchar(struct task_struct *tsk, ssize_t amt) { - tsk->rchar += amt; + tsk->ioac.chr.rchar += amt; } static inline void add_wchar(struct task_struct *tsk, ssize_t amt) { - tsk->wchar += amt; + tsk->ioac.chr.wchar += amt; } static inline void inc_syscr(struct task_struct *tsk) { - tsk->syscr++; + tsk->ioac.chr.syscr++; } static inline void inc_syscw(struct task_struct *tsk) { - tsk->syscw++; + tsk->ioac.chr.syscw++; } #else static inline void add_rchar(struct task_struct *tsk, ssize_t amt) -- cgit v1.2.2 From 940389b8afad6495211614c13eb91ef7001773ec Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 28 Jul 2008 00:48:12 +0200 Subject: task IO accounting: move all IO statistics in struct task_io_accounting Simplify the code of include/linux/task_io_accounting.h. It is also more reasonable to have all the task i/o-related statistics in a single struct (task_io_accounting). Signed-off-by: Andrea Righi Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- include/linux/sched.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 034c1ca6b332..5270d449ff9d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -505,7 +505,7 @@ struct signal_struct { unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; - struct proc_io_accounting ioac; + struct task_io_accounting ioac; /* * Cumulative ns of scheduled CPU time for dead threads in the @@ -1253,7 +1253,7 @@ struct task_struct { unsigned long ptrace_message; siginfo_t *last_siginfo; /* For ptrace use. */ - struct proc_io_accounting ioac; + struct task_io_accounting ioac; #if defined(CONFIG_TASK_XACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ @@ -2183,22 +2183,22 @@ extern long sched_group_rt_period(struct task_group *tg); #ifdef CONFIG_TASK_XACCT static inline void add_rchar(struct task_struct *tsk, ssize_t amt) { - tsk->ioac.chr.rchar += amt; + tsk->ioac.rchar += amt; } static inline void add_wchar(struct task_struct *tsk, ssize_t amt) { - tsk->ioac.chr.wchar += amt; + tsk->ioac.wchar += amt; } static inline void inc_syscr(struct task_struct *tsk) { - tsk->ioac.chr.syscr++; + tsk->ioac.syscr++; } static inline void inc_syscw(struct task_struct *tsk) { - tsk->ioac.chr.syscw++; + tsk->ioac.syscw++; } #else static inline void add_rchar(struct task_struct *tsk, ssize_t amt) -- cgit v1.2.2 From e4e4e534faa3c2be4e165ce414f44b76ada7208c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 14 Apr 2008 08:50:02 +0200 Subject: sched clock: revert various sched_clock() changes Found an interactivity problem on a quad core test-system - simple CPU loops would occasionally delay the system un an unacceptable way. After much debugging with Peter Zijlstra it turned out that the problem is caused by the string of sched_clock() changes - they caused the CPU clock to jump backwards a bit - which confuses the scheduler arithmetics. (which is unsigned for performance reasons) So revert: # c300ba2: sched_clock: and multiplier for TSC to gtod drift # c0c8773: sched_clock: only update deltas with local reads. # af52a90: sched_clock: stop maximum check on NO HZ # f7cce27: sched_clock: widen the max and min time This solves the interactivity problems. Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Mike Galbraith --- include/linux/sched.h | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5270d449ff9d..ea436bc1a0e2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1572,28 +1572,13 @@ static inline void sched_clock_idle_sleep_event(void) static inline void sched_clock_idle_wakeup_event(u64 delta_ns) { } - -#ifdef CONFIG_NO_HZ -static inline void sched_clock_tick_stop(int cpu) -{ -} - -static inline void sched_clock_tick_start(int cpu) -{ -} -#endif - -#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ +#else extern void sched_clock_init(void); extern u64 sched_clock_cpu(int cpu); extern void sched_clock_tick(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); -#ifdef CONFIG_NO_HZ -extern void sched_clock_tick_stop(int cpu); -extern void sched_clock_tick_start(int cpu); #endif -#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ /* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu -- cgit v1.2.2 From c1955a3d4762e7a9bf84035eb3c4886a900f0d15 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 11 Aug 2008 08:59:03 +0200 Subject: sched_clock: delay using sched_clock() Some arch's can't handle sched_clock() being called too early - delay this until sched_clock_init() has been called. Reported-by: Bill Gatliff Signed-off-by: Peter Zijlstra Tested-by: Nishanth Aravamudan CC: Russell King - ARM Linux Signed-off-by: Ingo Molnar --- include/linux/sched.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index ea436bc1a0e2..5850bfb968a8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1551,16 +1551,10 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) extern unsigned long long sched_clock(void); -#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK -static inline void sched_clock_init(void) -{ -} - -static inline u64 sched_clock_cpu(int cpu) -{ - return sched_clock(); -} +extern void sched_clock_init(void); +extern u64 sched_clock_cpu(int cpu); +#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK static inline void sched_clock_tick(void) { } @@ -1573,8 +1567,6 @@ static inline void sched_clock_idle_wakeup_event(u64 delta_ns) { } #else -extern void sched_clock_init(void); -extern u64 sched_clock_cpu(int cpu); extern void sched_clock_tick(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); -- cgit v1.2.2 From 9e2b2dc4133f65272a6d3c5dcb2ce63f8a87cae9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 13 Aug 2008 16:20:04 +0100 Subject: CRED: Introduce credential access wrappers The patches that are intended to introduce copy-on-write credentials for 2.6.28 require abstraction of access to some fields of the task structure, particularly for the case of one task accessing another's credentials where RCU will have to be observed. Introduced here are trivial no-op versions of the desired accessors for current and other tasks so that other subsystems can start to be converted over more easily. Wrappers are introduced into a new header (linux/cred.h) for UID/GID, EUID/EGID, SUID/SGID, FSUID/FSGID, cap_effective and current's subscribed user_struct. These wrappers are macros because the ordering between header files mitigates against making them inline functions. linux/cred.h is #included from linux/sched.h. Further, XFS is modified such that it no longer defines and uses parameterised versions of current_fs[ug]id(), thus getting rid of the namespace collision otherwise incurred. Signed-off-by: David Howells Signed-off-by: James Morris --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5850bfb968a8..cfb0d87b99fc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -87,6 +87,7 @@ struct sched_param { #include #include #include +#include #include -- cgit v1.2.2 From 07dd20e0324f4d3e33bde1944d4f7771a09c498c Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Fri, 1 Aug 2008 13:18:04 +0100 Subject: sched: reorder signal_struct to remove 8 bytes on 64 bit builds reorder structure to remove 8 bytes of padding on 64 bit builds Signed-off-by: Richard Kennedy Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index cfb0d87b99fc..5a8058e44f58 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -451,8 +451,8 @@ struct signal_struct { * - everyone except group_exit_task is stopped during signal delivery * of fatal signals, group_exit_task processes the signal. */ - struct task_struct *group_exit_task; int notify_count; + struct task_struct *group_exit_task; /* thread group stop support, overloads group_exit_code too */ int group_stop_count; -- cgit v1.2.2 From bee367ed066e26c14263d808136fba8eec3bd70a Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Fri, 1 Aug 2008 13:24:08 +0100 Subject: sched: reorder struct sched_rt_entity to remove padding on 64 bit builds remove 8 bytes of padding on 64 bit builds (also removes 8 bytes from task_struct) Signed-off-by: Richard Kennedy Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5a8058e44f58..08a87b5f29e1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1010,8 +1010,8 @@ struct sched_entity { struct sched_rt_entity { struct list_head run_list; - unsigned int time_slice; unsigned long timeout; + unsigned int time_slice; int nr_cpus_allowed; struct sched_rt_entity *back; -- cgit v1.2.2 From 49048622eae698e5c4ae61f7e71200f265ccc529 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 5 Sep 2008 18:12:23 +0200 Subject: sched: fix process time monotonicity Spencer reported a problem where utime and stime were going negative despite the fixes in commit b27f03d4bdc145a09fb7b0c0e004b29f1ee555fa. The suspected reason for the problem is that signal_struct maintains it's own utime and stime (of exited tasks), these are not updated using the new task_utime() routine, hence sig->utime can go backwards and cause the same problem to occur (sig->utime, adds tsk->utime and not task_utime()). This patch fixes the problem TODO: using max(task->prev_utime, derived utime) works for now, but a more generic solution is to implement cputime_max() and use the cputime_gt() function for comparison. Reported-by: spencer@bluehost.com Signed-off-by: Balbir Singh Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index cfb0d87b99fc..3d9120c5ad15 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1475,6 +1475,10 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } +extern cputime_t task_utime(struct task_struct *p); +extern cputime_t task_stime(struct task_struct *p); +extern cputime_t task_gtime(struct task_struct *p); + /* * Per process flags */ -- cgit v1.2.2 From f7d0b926ac8c8ec0c7a83ee69409bd2e6bb39f81 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 9 Sep 2008 15:43:22 -0700 Subject: mm: define USE_SPLIT_PTLOCKS rather than repeating expression Define USE_SPLIT_PTLOCKS as a constant expression rather than repeating "NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS" all over the place. Signed-off-by: Jeremy Fitzhardinge Acked-by: Hugh Dickins Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3d9120c5ad15..272c35309df2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -352,7 +352,7 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, extern void arch_unmap_area(struct mm_struct *, unsigned long); extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); -#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS +#if USE_SPLIT_PTLOCKS /* * The mm counters are not protected by its page_table_lock, * so must be incremented atomically. @@ -363,7 +363,7 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); #define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member) #define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member) -#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ +#else /* !USE_SPLIT_PTLOCKS */ /* * The mm counters are protected by its page_table_lock, * so can be incremented directly. @@ -374,7 +374,7 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); #define inc_mm_counter(mm, member) (mm)->_##member++ #define dec_mm_counter(mm, member) (mm)->_##member-- -#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ +#endif /* !USE_SPLIT_PTLOCKS */ #define get_mm_rss(mm) \ (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss)) -- cgit v1.2.2 From 15afe09bf496ae10c989e1a375a6b5da7bd3e16e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 20 Sep 2008 23:38:02 +0200 Subject: sched: wakeup preempt when small overlap Lin Ming reported a 10% OLTP regression against 2.6.27-rc4. The difference seems to come from different preemption agressiveness, which affects the cache footprint of the workload and its effective cache trashing. Aggresively preempt a task if its avg overlap is very small, this should avoid the task going to sleep and find it still running when we schedule back to it - saving a wakeup. Reported-by: Lin Ming Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index b3b7a8f32477..d8e699b55858 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -897,7 +897,7 @@ struct sched_class { void (*yield_task) (struct rq *rq); int (*select_task_rq)(struct task_struct *p, int sync); - void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); + void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync); struct task_struct * (*pick_next_task) (struct rq *rq); void (*put_prev_task) (struct rq *rq, struct task_struct *p); -- cgit v1.2.2 From a5d8c3483a6e19aca95ef6a2c5890e33bfa5b293 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Oct 2008 11:35:51 +0200 Subject: sched debug: add name to sched_domain sysctl entries add /proc/sys/kernel/sched_domain/cpu0/domain0/name, to make it easier to see which specific scheduler domain remained at that entry. Since we process the scheduler domain tree and simplify it, it's not always immediately clear during debugging which domain came from where. depends on CONFIG_SCHED_DEBUG=y. Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index d8e699b55858..5d0819ee442a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -824,6 +824,9 @@ struct sched_domain { unsigned int ttwu_move_affine; unsigned int ttwu_move_balance; #endif +#ifdef CONFIG_SCHED_DEBUG + char *name; +#endif }; extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, -- cgit v1.2.2