From 0793a61d4df8daeac6492dbf8d2f3e5713caae5e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 4 Dec 2008 20:12:29 +0100
Subject: performance counters: core code

Implement the core kernel bits of Performance Counters subsystem.

The Linux Performance Counter subsystem provides an abstraction of
performance counter hardware capabilities. It provides per task and per
CPU counters, and it provides event capabilities on top of those.

Performance counters are accessed via special file descriptors.
There's one file descriptor per virtual counter used.

The special file descriptor is opened via the perf_counter_open()
system call:

 int
 perf_counter_open(u32 hw_event_type,
                   u32 hw_event_period,
                   u32 record_type,
                   pid_t pid,
                   int cpu);

The syscall returns the new fd. The fd can be used via the normal
VFS system calls: read() can be used to read the counter, fcntl()
can be used to set the blocking mode, etc.

Multiple counters can be kept open at a time, and the counters
can be poll()ed.

See more details in Documentation/perf-counters.txt.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 55e30d114477..4c530278391b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -71,6 +71,7 @@ struct sched_param {
 #include <linux/fs_struct.h>
 #include <linux/compiler.h>
 #include <linux/completion.h>
+#include <linux/perf_counter.h>
 #include <linux/pid.h>
 #include <linux/percpu.h>
 #include <linux/topology.h>
@@ -1326,6 +1327,7 @@ struct task_struct {
 	struct list_head pi_state_list;
 	struct futex_pi_state *pi_state_cache;
 #endif
+	struct perf_counter_context perf_counter_ctx;
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;
 	short il_next;
@@ -2285,6 +2287,13 @@ static inline void inc_syscw(struct task_struct *tsk)
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
 
+/*
+ * Call the function if the target task is executing on a CPU right now:
+ */
+extern void task_oncpu_function_call(struct task_struct *p,
+				     void (*func) (void *info), void *info);
+
+
 #ifdef CONFIG_MM_OWNER
 extern void mm_update_next_owner(struct mm_struct *mm);
 extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
-- 
cgit v1.2.2


From 6c594c21fcb02c662f11c97be4d7d2b73060a205 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Dec 2008 12:34:15 +0100
Subject: perfcounters: add task migrations counter

Impact: add new feature, new sw counter

Add a counter that counts the number of cross-CPU migrations a
task is suffering.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4c530278391b..2e15be8fc792 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1014,6 +1014,8 @@ struct sched_entity {
 	u64			last_wakeup;
 	u64			avg_overlap;
 
+	u64			nr_migrations;
+
 #ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
 	u64			wait_max;
@@ -1029,7 +1031,6 @@ struct sched_entity {
 	u64			exec_max;
 	u64			slice_max;
 
-	u64			nr_migrations;
 	u64			nr_migrations_cold;
 	u64			nr_failed_migrations_affine;
 	u64			nr_failed_migrations_running;
-- 
cgit v1.2.2


From 23a185ca8abbeef64b6ffc33059b1d630e43ec10 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 9 Feb 2009 22:42:47 +1100
Subject: perf_counters: make software counters work as per-cpu counters

Impact: kernel crash fix

Yanmin Zhang reported that using a PERF_COUNT_TASK_CLOCK software
counter as a per-cpu counter would reliably crash the system, because
it calls __task_delta_exec with a null pointer.  The page fault,
context switch and cpu migration counters also won't function
correctly as per-cpu counters since they reference the current task.

This fixes the problem by redirecting the task_clock counter to the
cpu_clock counter when used as a per-cpu counter, and by implementing
per-cpu page fault, context switch and cpu migration counters.

Along the way, this:

- Initializes counter->ctx earlier, in perf_counter_alloc, so that
  sw_perf_counter_init can use it
- Adds code to kernel/sched.c to count task migrations into each
  cpu, in rq->nr_migrations_in
- Exports the per-cpu context switch and task migration counts
  via new functions added to kernel/sched.c
- Makes sure that if sw_perf_counter_init fails, we don't try to
  initialize the counter as a hardware counter.  Since the user has
  passed a negative, non-raw event type, they clearly don't intend
  for it to be interpreted as a hardware event.

Reported-by: "Zhang Yanmin" <yanmin_zhang@linux.intel.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b85b10abf770..1e5f70062a9c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -137,6 +137,8 @@ extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
+extern u64 cpu_nr_switches(int cpu);
+extern u64 cpu_nr_migrations(int cpu);
 
 struct seq_file;
 struct cfs_rq;
-- 
cgit v1.2.2


From 4a0deca657f3dbb8a707b5dc8f173beec01e7ed2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Mar 2009 20:26:12 +0100
Subject: perf_counter: generic context switch event

Impact: cleanup

Use the generic software events for context switches.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Orig-LKML-Reference: <20090319194233.283522645@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75b2fc5306d8..7ed41f7c5ace 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -138,7 +138,6 @@ extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
-extern u64 cpu_nr_switches(int cpu);
 extern u64 cpu_nr_migrations(int cpu);
 
 extern unsigned long get_parent_ip(unsigned long addr);
-- 
cgit v1.2.2


From a26b89f05d194413c7238e0bea071054f6b5d3c8 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:34 +0200
Subject: sched, hw-branch-tracer: add wait_task_context_switch() function to
 sched.h

Add a function to wait until some other task has been
switched out at least once.

This differs from wait_task_inactive() subtly, in that the
latter will wait until the task has left the CPU.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: markus.t.metzger@gmail.com
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144549.794157000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b94f3541f67b..a5b9a83065fa 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1993,8 +1993,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
+extern void wait_task_context_switch(struct task_struct *p);
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
+static inline void wait_task_context_switch(struct task_struct *p) {}
 static inline unsigned long wait_task_inactive(struct task_struct *p,
 					       long match_state)
 {
-- 
cgit v1.2.2


From e2b371f00a6f529f6362654239bdec8dcd510760 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:35 +0200
Subject: mm, x86, ptrace, bts: defer branch trace stopping

When a ptraced task is unlinked, we need to stop branch tracing for
that task.

Since the unlink is called with interrupts disabled, and we need
interrupts enabled to stop branch tracing, we defer the work.

Collect all branch tracing related stuff in a branch tracing context.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144550.712401000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a5b9a83065fa..52b8cd049c2e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -96,8 +96,8 @@ struct exec_domain;
 struct futex_pi_state;
 struct robust_list_head;
 struct bio;
-struct bts_tracer;
 struct fs_struct;
+struct bts_context;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -1210,12 +1210,7 @@ struct task_struct {
 	 * This is the tracer handle for the ptrace BTS extension.
 	 * This field actually belongs to the ptracer task.
 	 */
-	struct bts_tracer *bts;
-	/*
-	 * The buffer to hold the BTS data.
-	 */
-	void *bts_buffer;
-	size_t bts_size;
+	struct bts_context *bts;
 #endif /* CONFIG_X86_PTRACE_BTS */
 
 	/* PID/PID hash table linkage. */
-- 
cgit v1.2.2


From 0f4814065ff8c24ca8bfd75c9b73502be152c287 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:48 +0200
Subject: x86, ptrace: add bts context unconditionally

Add the ptrace bts context field to task_struct unconditionally.

Initialize the field directly in copy_process().
Remove all the unneeded functionality used to initialize that field.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144603.292754000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 52b8cd049c2e..451186a22ef5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1205,13 +1205,11 @@ struct task_struct {
 	struct list_head ptraced;
 	struct list_head ptrace_entry;
 
-#ifdef CONFIG_X86_PTRACE_BTS
 	/*
 	 * This is the tracer handle for the ptrace BTS extension.
 	 * This field actually belongs to the ptracer task.
 	 */
 	struct bts_context *bts;
-#endif /* CONFIG_X86_PTRACE_BTS */
 
 	/* PID/PID hash table linkage. */
 	struct pid_link pids[PIDTYPE_MAX];
-- 
cgit v1.2.2


From 05725f7eb4b8acb147c5fc7b91397b1f6bcab00d Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 14 Apr 2009 20:17:16 +0200
Subject: rculist: use list_entry_rcu in places where it's appropriate

Use previously introduced list_entry_rcu instead of an open-coded
list_entry + rcu_dereference combination.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: dipankar@in.ibm.com
LKML-Reference: <20090414181715.GA3634@psychotron.englab.brq.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc8049c..886df41e7452 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -77,6 +77,7 @@ struct sched_param {
 #include <linux/proportions.h>
 #include <linux/seccomp.h>
 #include <linux/rcupdate.h>
+#include <linux/rculist.h>
 #include <linux/rtmutex.h>
 
 #include <linux/time.h>
@@ -2010,7 +2011,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
 }
 #endif
 
-#define next_task(p)	list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks)
+#define next_task(p) \
+	list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
 
 #define for_each_process(p) \
 	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
@@ -2049,8 +2051,8 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2)
 
 static inline struct task_struct *next_thread(const struct task_struct *p)
 {
-	return list_entry(rcu_dereference(p->thread_group.next),
-			  struct task_struct, thread_group);
+	return list_entry_rcu(p->thread_group.next,
+			      struct task_struct, thread_group);
 }
 
 static inline int thread_group_empty(struct task_struct *p)
-- 
cgit v1.2.2


From 261842b7c9099f56de2eb969c8ad65402d68e00e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 16 Apr 2009 21:41:52 -0400
Subject: tracing: add same level recursion detection

The tracing infrastructure allows for recursion. That is, an interrupt
may interrupt the act of tracing an event, and that interrupt may very well
perform its own trace. This is a recursive trace, and is fine to do.

The problem arises when there is a bug, and the utility doing the trace
calls something that recurses back into the tracer. This recursion is not
caused by an external event like an interrupt, but by code that is not
expected to recurse. The result could be a lockup.

This patch adds a bitmask to the task structure that keeps track
of the trace recursion. To find the interrupt depth, the following
algorithm is used:

  level = hardirq_count() + softirq_count() + in_nmi;

Here, level will be the depth of interrutps and softirqs, and even handles
the nmi. Then the corresponding bit is set in the recursion bitmask.
If the bit was already set, we know we had a recursion at the same level
and we warn about it and fail the writing to the buffer.

After the data has been committed to the buffer, we clear the bit.
No atomics are needed. The only races are with interrupts and they reset
the bitmask before returning anywy.

[ Impact: detect same irq level trace recursion ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/sched.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc8049c..7ede5e490913 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1428,7 +1428,9 @@ struct task_struct {
 #ifdef CONFIG_TRACING
 	/* state flags for use by tracers */
 	unsigned long trace;
-#endif
+	/* bitmask of trace recursion */
+	unsigned long trace_recursion;
+#endif /* CONFIG_TRACING */
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
-- 
cgit v1.2.2


From 3bcac0263f0b45e67a64034ebcb69eb9abb742f4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 29 Apr 2009 13:45:05 +0100
Subject: SELinux: Don't flush inherited SIGKILL during execve()

Don't flush inherited SIGKILL during execve() in SELinux's post cred commit
hook.  This isn't really a security problem: if the SIGKILL came before the
credentials were changed, then we were right to receive it at the time, and
should honour it; if it came after the creds were changed, then we definitely
should honour it; and in any case, all that will happen is that the process
will be scrapped before it ever returns to userspace.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1d19c025f9d2..d3b787c7aef3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1875,6 +1875,7 @@ extern void sched_dead(struct task_struct *p);
 
 extern void proc_caches_init(void);
 extern void flush_signals(struct task_struct *);
+extern void __flush_signals(struct task_struct *);
 extern void ignore_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *, int force_default);
 extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
-- 
cgit v1.2.2


From 5e751e992f3fb08ba35e1ca8095ec8fbf9eda523 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 8 May 2009 13:55:22 +0100
Subject: CRED: Rename cred_exec_mutex to reflect that it's a guard against
 ptrace

Rename cred_exec_mutex to reflect that it's a guard against foreign
intervention on a process's credential state, such as is made by ptrace().  The
attachment of a debugger to a process affects execve()'s calculation of the new
credential state - _and_ also setprocattr()'s calculation of that state.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/sched.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3fa82b353c98..5932ace22400 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1247,7 +1247,9 @@ struct task_struct {
 					 * credentials (COW) */
 	const struct cred *cred;	/* effective (overridable) subjective task
 					 * credentials (COW) */
-	struct mutex cred_exec_mutex;	/* execve vs ptrace cred calculation mutex */
+	struct mutex cred_guard_mutex;	/* guard against foreign influences on
+					 * credential calculations
+					 * (notably. ptrace) */
 
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
-- 
cgit v1.2.2


From cd1bb94b4a0531e8211a3774f17de831f8285f76 Mon Sep 17 00:00:00 2001
From: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Date: Thu, 16 Apr 2009 12:15:34 +0530
Subject: timers: /proc/sys sysctl hook to enable timer migration

* Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2009-04-16 12:11:36]:

This patch creates the /proc/sys sysctl interface at
/proc/sys/kernel/timer_migration

Timer migration is enabled by default.

To disable timer migration, when CONFIG_SCHED_DEBUG = y,

echo 0 > /proc/sys/kernel/timer_migration

Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc8049c..618504010400 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1766,6 +1766,7 @@ extern unsigned int sysctl_sched_child_runs_first;
 extern unsigned int sysctl_sched_features;
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_timer_migration;
 
 int sched_nr_latency_handler(struct ctl_table *table, int write,
 		struct file *file, void __user *buffer, size_t *length,
-- 
cgit v1.2.2


From eea08f32adb3f97553d49a4f79a119833036000a Mon Sep 17 00:00:00 2001
From: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Date: Thu, 16 Apr 2009 12:16:41 +0530
Subject: timers: Logic to move non pinned timers

* Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2009-04-16 12:11:36]:

This patch migrates all non pinned timers and hrtimers to the current
idle load balancer, from all the idle CPUs. Timers firing on busy CPUs
are not migrated.

While migrating hrtimers, care should be taken to check if migrating
a hrtimer would result in a latency or not. So we compare the expiry of the
hrtimer with the next timer interrupt on the target cpu and migrate the
hrtimer only if it expires *after* the next interrupt on the target cpu.
So, added a clockevents_get_next_event() helper function to return the
next_event on the target cpu's clock_event_device.

[ tglx: cleanups and simplifications ]

Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 618504010400..311dec123974 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -257,6 +257,7 @@ extern void task_rq_unlock_wait(struct task_struct *p);
 extern cpumask_var_t nohz_cpu_mask;
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
 extern int select_nohz_load_balancer(int cpu);
+extern int get_nohz_load_balancer(void);
 #else
 static inline int select_nohz_load_balancer(int cpu)
 {
@@ -1772,6 +1773,17 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
 		struct file *file, void __user *buffer, size_t *length,
 		loff_t *ppos);
 #endif
+#ifdef CONFIG_SCHED_DEBUG
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+	return sysctl_timer_migration;
+}
+#else
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+	return 1;
+}
+#endif
 extern unsigned int sysctl_sched_rt_period;
 extern int sysctl_sched_rt_runtime;
 
-- 
cgit v1.2.2


From 789f90fcf6b0b54e655740e9396c954378542c79 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 15 May 2009 15:19:27 +0200
Subject: perf_counter: per user mlock gift

Instead of a per-process mlock gift for perf-counters, use a
per-user gift so that there is less of a DoS potential.

[ Impact: allow less worst-case unprivileged memory consumption ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.496182835@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d1857580a132..ff59d1231519 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -674,6 +674,10 @@ struct user_struct {
 	struct work_struct work;
 #endif
 #endif
+
+#ifdef CONFIG_PERF_COUNTERS
+	atomic_long_t locked_vm;
+#endif
 };
 
 extern int uids_sysfs_init(void);
-- 
cgit v1.2.2


From dce48a84adf1806676319f6f480e30a6daa012f9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 11 Apr 2009 10:43:41 +0200
Subject: sched, timers: move calc_load() to scheduler

Dimitri Sivanich noticed that xtime_lock is held write locked across
calc_load() which iterates over all online CPUs. That can cause long
latencies for xtime_lock readers on large SMP systems.

The load average calculation is an rough estimate anyway so there is
no real need to protect the readers vs. the update. It's not a problem
when the avenrun array is updated while a reader copies the values.

Instead of iterating over all online CPUs let the scheduler_tick code
update the number of active tasks shortly before the avenrun update
happens. The avenrun update itself is handled by the CPU which calls
do_timer().

[ Impact: reduce xtime_lock write locked section ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc8049c..6eb4892efe45 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -135,8 +135,8 @@ DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
-extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
+extern void calc_global_load(void);
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
-- 
cgit v1.2.2


From 2d02494f5a90f2e4b3c4c6acc85ec94674cdc431 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 2 May 2009 20:08:52 +0200
Subject: sched, timers: cleanup avenrun users

avenrun is an rough estimate so we don't have to worry about
consistency of the three avenrun values. Remove the xtime lock
dependency and provide a function to scale the values. Cleanup the
users.

[ Impact: cleanup ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6eb4892efe45..de7b3b217772 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -116,6 +116,7 @@ struct fs_struct;
  *    11 bit fractions.
  */
 extern unsigned long avenrun[];		/* Load averages */
+extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
 
 #define FSHIFT		11		/* nr of bits of precision */
 #define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
-- 
cgit v1.2.2


From 690cc3ffe33ac4a2857583c22d4c6244ae11684d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 13 May 2009 16:55:10 +0000
Subject: syscall: Implement a convinience function restart_syscall

Currently when we have a signal pending we have the functionality
to restart that the current system call.  There are other cases
such as nasty lock ordering issues where it makes sense to have
a simple fix that uses try lock and restarts the system call.
Buying time to figure out how to rework the locking strategy.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sched.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc8049c..d853f6bb0baf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2178,6 +2178,12 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
 }
 
+static inline int restart_syscall(void)
+{
+	set_tsk_thread_flag(current, TIF_SIGPENDING);
+	return -ERESTARTNOINTR;
+}
+
 static inline int signal_pending(struct task_struct *p)
 {
 	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
-- 
cgit v1.2.2


From 4200efd9acda4accf24640f1e77d24fdcdb524df Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 19 May 2009 09:22:19 +0200
Subject: sched: properly define the sched_group::cpumask and
 sched_domain::span fields

Properly document the variable-size structure tricks we are doing
wrt. struct sched_group and sched_domain, and use the field[0] GCC
extension instead of defining a vla array.

Dont use unions for this, as pointed out by Linus.

[ Impact: cleanup, un-confuse Sparse and LLVM ]

Reported-by: Jeff Garzik <jeff@garzik.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.01.0905180850110.3301@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index de7b3b217772..dbb1043e8656 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -839,7 +839,17 @@ struct sched_group {
 	 */
 	u32 reciprocal_cpu_power;
 
-	unsigned long cpumask[];
+	/*
+	 * The CPUs this group covers.
+	 *
+	 * NOTE: this field is variable length. (Allocated dynamically
+	 * by attaching extra space to the end of the structure,
+	 * depending on how many CPUs the kernel has booted up with)
+	 *
+	 * It is also be embedded into static data structures at build
+	 * time. (See 'struct static_sched_group' in kernel/sched.c)
+	 */
+	unsigned long cpumask[0];
 };
 
 static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
@@ -925,8 +935,17 @@ struct sched_domain {
 	char *name;
 #endif
 
-	/* span of all CPUs in this domain */
-	unsigned long span[];
+	/*
+	 * Span of all CPUs in this domain.
+	 *
+	 * NOTE: this field is variable length. (Allocated dynamically
+	 * by attaching extra space to the end of the structure,
+	 * depending on how many CPUs the kernel has booted up with)
+	 *
+	 * It is also be embedded into static data structures at build
+	 * time. (See 'struct static_sched_domain' in kernel/sched.c)
+	 */
+	unsigned long span[0];
 };
 
 static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
-- 
cgit v1.2.2


From a63eaf34ae60bdb067a354cc8def2e8f4a01f5f4 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 22 May 2009 14:17:31 +1000
Subject: perf_counter: Dynamically allocate tasks' perf_counter_context struct

This replaces the struct perf_counter_context in the task_struct with
a pointer to a dynamically allocated perf_counter_context struct.  The
main reason for doing is this is to allow us to transfer a
perf_counter_context from one task to another when we do lazy PMU
switching in a later patch.

This has a few side-benefits: the task_struct becomes a little smaller,
we save some memory because only tasks that have perf_counters attached
get a perf_counter_context allocated for them, and we can remove the
inclusion of <linux/perf_counter.h> in sched.h, meaning that we don't
end up recompiling nearly everything whenever perf_counter.h changes.

The perf_counter_context structures are reference-counted and freed
when the last reference is dropped.  A context can have references
from its task and the counters on its task.  Counters can outlive the
task so it is possible that a context will be freed well after its
task has exited.

Contexts are allocated on fork if the parent had a context, or
otherwise the first time that a per-task counter is created on a task.
In the latter case, we set the context pointer in the task struct
locklessly using an atomic compare-and-exchange operation in case we
raced with some other task in creating a context for the subject task.

This also removes the task pointer from the perf_counter struct.  The
task pointer was not used anywhere and would make it harder to move a
context from one task to another.  Anything that needed to know which
task a counter was attached to was already using counter->ctx->task.

The __perf_counter_init_context function moves up in perf_counter.c
so that it can be called from find_get_context, and now initializes
the refcount, but is otherwise unchanged.

We were potentially calling list_del_counter twice: once from
__perf_counter_exit_task when the task exits and once from
__perf_counter_remove_from_context when the counter's fd gets closed.
This adds a check in list_del_counter so it doesn't do anything if
the counter has already been removed from the lists.

Since perf_counter_task_sched_in doesn't do anything if the task doesn't
have a context, and leaves cpuctx->task_ctx = NULL, this adds code to
__perf_install_in_context to set cpuctx->task_ctx if necessary, i.e. in
the case where the current task adds the first counter to itself and
thus creates a context for itself.

This also adds similar code to __perf_counter_enable to handle a
similar situation which can arise when the counters have been disabled
using prctl; that also leaves cpuctx->task_ctx = NULL.

[ Impact: refactor counter context management to prepare for new feature ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <18966.10075.781053.231153@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ff59d1231519..9714d450f417 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -71,7 +71,6 @@ struct sched_param {
 #include <linux/path.h>
 #include <linux/compiler.h>
 #include <linux/completion.h>
-#include <linux/perf_counter.h>
 #include <linux/pid.h>
 #include <linux/percpu.h>
 #include <linux/topology.h>
@@ -99,6 +98,7 @@ struct robust_list_head;
 struct bio;
 struct bts_tracer;
 struct fs_struct;
+struct perf_counter_context;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -1387,7 +1387,9 @@ struct task_struct {
 	struct list_head pi_state_list;
 	struct futex_pi_state *pi_state_cache;
 #endif
-	struct perf_counter_context perf_counter_ctx;
+#ifdef CONFIG_PERF_COUNTERS
+	struct perf_counter_context *perf_counter_ctxp;
+#endif
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;
 	short il_next;
-- 
cgit v1.2.2


From 082ff5a2767a0679ee543f14883adbafb631ffbe Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 23 May 2009 18:29:00 +0200
Subject: perf_counter: Change pctrl() behaviour

Instead of en/dis-abling all counters acting on a particular
task, en/dis- able all counters we created.

[ v2: fix crash on first counter enable ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090523163012.916937244@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9714d450f417..bc9326dcdde1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1389,6 +1389,8 @@ struct task_struct {
 #endif
 #ifdef CONFIG_PERF_COUNTERS
 	struct perf_counter_context *perf_counter_ctxp;
+	struct mutex perf_counter_mutex;
+	struct list_head perf_counter_list;
 #endif
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;
-- 
cgit v1.2.2


From 3959214f971417f4162926ac52ad4cd042958caa Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Tue, 24 Mar 2009 15:43:30 +0100
Subject: sched: delayed cleanup of user_struct

During bootup performance tracing we see repeated occurrences of
/sys/kernel/uid/* events for the same uid, leading to a,
in this case, rather pointless userspace processing for the
same uid over and over.

This is usually caused by tools which change their uid to "nobody",
to run without privileges to read data supplied by untrusted users.

This change delays the execution of the (already existing) scheduled
work, to cleanup the uid after one second, so the allocated and announced
uid can possibly be re-used by another process.

This is the current behavior, where almost every invocation of a
binary, which changes the uid, creates two events:
  $ read START < /sys/kernel/uevent_seqnum; \
  for i in `seq 100`; do su --shell=/bin/true bin; done; \
  read END < /sys/kernel/uevent_seqnum; \
  echo $(($END - $START))
  178

With the delayed cleanup, we get only two events, and userspace finishes
a bit faster too:
  $ read START < /sys/kernel/uevent_seqnum; \
  for i in `seq 100`; do su --shell=/bin/true bin; done; \
  read END < /sys/kernel/uevent_seqnum; \
  echo $(($END - $START))
  1

Acked-by: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c900aa530070..7531b1c28201 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -674,7 +674,7 @@ struct user_struct {
 	struct task_group *tg;
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;
-	struct work_struct work;
+	struct delayed_work work;
 #endif
 #endif
 
-- 
cgit v1.2.2


From 58568d2a8215cb6f55caf2332017d7bdff954e1c Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Tue, 16 Jun 2009 15:31:49 -0700
Subject: cpuset,mm: update tasks' mems_allowed in time

Fix allocating page cache/slab object on the unallowed node when memory
spread is set by updating tasks' mems_allowed after its cpuset's mems is
changed.

In order to update tasks' mems_allowed in time, we must modify the code of
memory policy.  Because the memory policy is applied in the process's
context originally.  After applying this patch, one task directly
manipulates anothers mems_allowed, and we use alloc_lock in the
task_struct to protect mems_allowed and memory policy of the task.

But in the fast path, we didn't use lock to protect them, because adding a
lock may lead to performance regression.  But if we don't add a lock,the
task might see no nodes when changing cpuset's mems_allowed to some
non-overlapping set.  In order to avoid it, we set all new allowed nodes,
then clear newly disallowed ones.

[lee.schermerhorn@hp.com:
  The rework of mpol_new() to extract the adjusting of the node mask to
  apply cpuset and mpol flags "context" breaks set_mempolicy() and mbind()
  with MPOL_PREFERRED and a NULL nodemask--i.e., explicit local
  allocation.  Fix this by adding the check for MPOL_PREFERRED and empty
  node mask to mpol_new_mpolicy().

  Remove the now unneeded 'nodes = NULL' from mpol_new().

  Note that mpol_new_mempolicy() is always called with a non-NULL
  'nodes' parameter now that it has been removed from mpol_new().
  Therefore, we don't need to test nodes for NULL before testing it for
  'empty'.  However, just to be extra paranoid, add a VM_BUG_ON() to
  verify this assumption.]
[lee.schermerhorn@hp.com:

  I don't think the function name 'mpol_new_mempolicy' is descriptive
  enough to differentiate it from mpol_new().

  This function applies cpuset set context, usually constraining nodes
  to those allowed by the cpuset.  However, when the 'RELATIVE_NODES flag
  is set, it also translates the nodes.  So I settled on
  'mpol_set_nodemask()', because the comment block for mpol_new() mentions
  that we need to call this function to "set nodes".

  Some additional minor line length, whitespace and typo cleanup.]
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Paul Menage <menage@google.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c900aa530070..1048bf50540a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1318,7 +1318,8 @@ struct task_struct {
 /* Thread group tracking */
    	u32 parent_exec_id;
    	u32 self_exec_id;
-/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
+/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
+ * mempolicy */
 	spinlock_t alloc_lock;
 
 #ifdef CONFIG_GENERIC_HARDIRQS
@@ -1386,8 +1387,7 @@ struct task_struct {
 	cputime_t acct_timexpd;	/* stime + utime since last update */
 #endif
 #ifdef CONFIG_CPUSETS
-	nodemask_t mems_allowed;
-	int cpuset_mems_generation;
+	nodemask_t mems_allowed;	/* Protected by alloc_lock */
 	int cpuset_mem_spread_rotor;
 #endif
 #ifdef CONFIG_CGROUPS
@@ -1410,7 +1410,7 @@ struct task_struct {
 	struct list_head perf_counter_list;
 #endif
 #ifdef CONFIG_NUMA
-	struct mempolicy *mempolicy;
+	struct mempolicy *mempolicy;	/* Protected by alloc_lock */
 	short il_next;
 #endif
 	atomic_t fs_excl;	/* holding fs exclusive resources */
-- 
cgit v1.2.2


From 2ff05b2b4eac2e63d345fc731ea151a060247f53 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 16 Jun 2009 15:32:56 -0700
Subject: oom: move oom_adj value from task_struct to mm_struct

The per-task oom_adj value is a characteristic of its mm more than the
task itself since it's not possible to oom kill any thread that shares the
mm.  If a task were to be killed while attached to an mm that could not be
freed because another thread were set to OOM_DISABLE, it would have
needlessly been terminated since there is no potential for future memory
freeing.

This patch moves oomkilladj (now more appropriately named oom_adj) from
struct task_struct to struct mm_struct.  This requires task_lock() on a
task to check its oom_adj value to protect against exec, but it's already
necessary to take the lock when dereferencing the mm to find the total VM
size for the badness heuristic.

This fixes a livelock if the oom killer chooses a task and another thread
sharing the same memory has an oom_adj value of OOM_DISABLE.  This occurs
because oom_kill_task() repeatedly returns 1 and refuses to kill the
chosen task while select_bad_process() will repeatedly choose the same
task during the next retry.

Taking task_lock() in select_bad_process() to check for OOM_DISABLE and in
oom_kill_task() to check for threads sharing the same memory will be
removed in the next patch in this series where it will no longer be
necessary.

Writing to /proc/pid/oom_adj for a kthread will now return -EINVAL since
these threads are immune from oom killing already.  They simply report an
oom_adj value of OOM_DISABLE.

Cc: Nick Piggin <npiggin@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1048bf50540a..1bc6fae0c135 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1178,7 +1178,6 @@ struct task_struct {
 	 * a short time
 	 */
 	unsigned char fpu_counter;
-	s8 oomkilladj; /* OOM kill score adjustment (bit shift). */
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	unsigned int btrace_seq;
 #endif
-- 
cgit v1.2.2


From 20ebcdda78a282d1d5266887ddf8a2d670182576 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 17 Jun 2009 16:27:16 -0700
Subject: memcg: remove unneeded forward declaration from sched.h

This forward declaration seems pointless.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 02042e7f2196..d0342101756a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -92,7 +92,6 @@ struct sched_param {
 
 #include <asm/processor.h>
 
-struct mem_cgroup;
 struct exec_domain;
 struct futex_pi_state;
 struct robust_list_head;
-- 
cgit v1.2.2


From 17f98dcf6010a1cfd25d179fd0ce77d3dc2685c3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 17 Jun 2009 16:27:51 -0700
Subject: pids: clean up find_task_by_pid variants

find_task_by_pid_type_ns is only used to implement find_task_by_vpid and
find_task_by_pid_ns, but both of them pass PIDTYPE_PID as first argument.
So just fold find_task_by_pid_type_ns into find_task_by_pid_ns and use
find_task_by_pid_ns to implement find_task_by_vpid.

While we're at it also remove the exports for find_task_by_pid_ns and
find_task_by_vpid - we don't have any modular callers left as the only
modular caller of he old pre pid namespace find_task_by_pid (gfs2) was
switched to pid_task which operates on a struct pid pointer instead of a
pid_t.  Given the confusion about pid_t values vs namespace that's
generally the better option anyway and I think we're better of restricting
modules to do it that way.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d0342101756a..4d0754269884 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1878,9 +1878,6 @@ extern struct pid_namespace init_pid_ns;
 /*
  * find a task by one of its numerical ids
  *
- * find_task_by_pid_type_ns():
- *      it is the most generic call - it finds a task by all id,
- *      type and namespace specified
  * find_task_by_pid_ns():
  *      finds a task by its pid in the specified namespace
  * find_task_by_vpid():
@@ -1889,9 +1886,6 @@ extern struct pid_namespace init_pid_ns;
  * see also find_vpid() etc in include/linux/pid.h
  */
 
-extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
-		struct pid_namespace *ns);
-
 extern struct task_struct *find_task_by_vpid(pid_t nr);
 extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 		struct pid_namespace *ns);
-- 
cgit v1.2.2