From ee68cea2c26b7a8222f9020f54d22c6067011e8b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 15 Feb 2006 01:34:23 -0800
Subject: [NETFILTER]: Fix xfrm lookup after SNAT

To find out if a packet needs to be handled by IPsec after SNAT, packets
are currently rerouted in POST_ROUTING and a new xfrm lookup is done. This
breaks SNAT of non-unicast packets to non-local addresses because the
packet is routed as incoming packet and no neighbour entry is bound to the
dst_entry. In general, it seems to be a bad idea to replace the dst_entry
after the packet was already sent to the output routine because its state
might not match what's expected.

This patch changes the xfrm lookup in POST_ROUTING to re-use the original
dst_entry without routing the packet again. This means no policy routing
can be used for transport mode transforms (which keep the original route)
when packets are SNATed to match the policy, but it looks like the best
we can do for now.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index fdc4a9527343..43c09d790b83 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -79,7 +79,7 @@ enum nf_ip_hook_priorities {
 
 #ifdef __KERNEL__
 extern int ip_route_me_harder(struct sk_buff **pskb);
-
+extern int ip_xfrm_me_harder(struct sk_buff **pskb);
 #endif /*__KERNEL__*/
 
 #endif /*__LINUX_IP_NETFILTER_H*/
-- 
cgit v1.2.2


From 5ecfbae093f0c37311e89b29bfc0c9d586eace87 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 15 Feb 2006 22:50:10 +0300
Subject: [PATCH] fix zap_thread's ptrace related problems

1. The tracee can go from ptrace_stop() to do_signal_stop()
   after __ptrace_unlink(p).

2. It is unsafe to __ptrace_unlink(p) while p->parent may wait
   for tasklist_lock in ptrace_detach().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ptrace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 9d5cd106b344..0d36750fc0f1 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -84,6 +84,7 @@ extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __us
 extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
 extern int ptrace_attach(struct task_struct *tsk);
 extern int ptrace_detach(struct task_struct *, unsigned int);
+extern void __ptrace_detach(struct task_struct *, unsigned int);
 extern void ptrace_disable(struct task_struct *);
 extern int ptrace_check_attach(struct task_struct *task, int kill);
 extern int ptrace_request(struct task_struct *child, long request, long addr, long data);
-- 
cgit v1.2.2


From 48d5cad87c3a4998d0bda16ccfb5c60dfe4de5fb Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 15 Feb 2006 15:10:22 -0800
Subject: [XFRM]: Fix SNAT-related crash in xfrm4_output_finish

When a packet matching an IPsec policy is SNATed so it doesn't match any
policy anymore it looses its xfrm bundle, which makes xfrm4_output_finish
crash because of a NULL pointer dereference.

This patch directs these packets to the original output path instead. Since
the packets have already passed the POST_ROUTING hook, but need to start at
the beginning of the original output path which includes another
POST_ROUTING invocation, a flag is added to the IPCB to indicate that the
packet was rerouted and doesn't need to pass the POST_ROUTING hook again.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 4cf6088625c1..3ca3d9ee78a9 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -184,8 +184,11 @@ static inline int nf_hook_thresh(int pf, unsigned int hook,
 				 struct sk_buff **pskb,
 				 struct net_device *indev,
 				 struct net_device *outdev,
-				 int (*okfn)(struct sk_buff *), int thresh)
+				 int (*okfn)(struct sk_buff *), int thresh,
+				 int cond)
 {
+	if (!cond)
+		return 1;
 #ifndef CONFIG_NETFILTER_DEBUG
 	if (list_empty(&nf_hooks[pf][hook]))
 		return 1;
@@ -197,7 +200,7 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
 			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct sk_buff *))
 {
-	return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN);
+	return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN, 1);
 }
                    
 /* Activate hook; either okfn or kfree_skb called, unless a hook
@@ -224,7 +227,13 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
 
 #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh)	       \
 ({int __ret;								       \
-if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1)\
+if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh, 1)) == 1)\
+	__ret = (okfn)(skb);						       \
+__ret;})
+
+#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond)		       \
+({int __ret;								       \
+if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\
 	__ret = (okfn)(skb);						       \
 __ret;})
 
@@ -295,11 +304,13 @@ extern struct proc_dir_entry *proc_net_netfilter;
 
 #else /* !CONFIG_NETFILTER */
 #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
+#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
 static inline int nf_hook_thresh(int pf, unsigned int hook,
 				 struct sk_buff **pskb,
 				 struct net_device *indev,
 				 struct net_device *outdev,
-				 int (*okfn)(struct sk_buff *), int thresh)
+				 int (*okfn)(struct sk_buff *), int thresh,
+				 int cond)
 {
 	return okfn(*pskb);
 }
-- 
cgit v1.2.2


From 9c92d3486434e7310cb288587953e2dae4a79701 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 15 Feb 2006 15:18:19 -0800
Subject: [NETFILTER]: Don't invoke okfn in CONFIG_NETFILTER=n variant of
 nf_hook()

nf_hook() is supposed to call the netfilter hook and return control of the
packet back to the caller in case it may pass, the okfn is only used for
queueing.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 3ca3d9ee78a9..468896939843 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -318,7 +318,7 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
 			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct sk_buff *))
 {
-	return okfn(*pskb);
+	return 1;
 }
 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
 struct flowi;
-- 
cgit v1.2.2


From b2ee9dbfad14ba8e34a589d552ddc67300a26bec Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Wed, 15 Feb 2006 15:17:40 -0800
Subject: [PATCH] hrtimer: fix multiple macro argument expansion

For two macros the arguments were expanded twice, change them to inline
functions to avoid it.

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ktime.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 6aca67a569a2..f3dec45ef874 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -96,10 +96,16 @@ static inline ktime_t ktime_set(const long secs, const unsigned long nsecs)
 		({ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }; })
 
 /* convert a timespec to ktime_t format: */
-#define timespec_to_ktime(ts)		ktime_set((ts).tv_sec, (ts).tv_nsec)
+static inline ktime_t timespec_to_ktime(struct timespec ts)
+{
+	return ktime_set(ts.tv_sec, ts.tv_nsec);
+}
 
 /* convert a timeval to ktime_t format: */
-#define timeval_to_ktime(tv)		ktime_set((tv).tv_sec, (tv).tv_usec * 1000)
+static inline ktime_t timeval_to_ktime(struct timeval tv)
+{
+	return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC);
+}
 
 /* Map the ktime_t to timespec conversion to ns_to_timespec function */
 #define ktime_to_timespec(kt)		ns_to_timespec((kt).tv64)
-- 
cgit v1.2.2


From a62eaf151d9cb478d127cfbc2e93c498869785b0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 16 Feb 2006 23:41:58 +0100
Subject: [PATCH] x86_64: Add boot option to disable randomized mappings and
 cleanup

AMD SimNow!'s JIT doesn't like them at all in the guest. For distribution
installation it's easiest if it's a boot time option.

Also I moved the variable to a more appropiate place and make
it independent from sysctl

And marked __read_mostly which it is.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kernel.h | 6 ------
 include/linux/mm.h     | 2 ++
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index b49affa0ac5a..3b507bf05d09 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -326,12 +326,6 @@ struct sysinfo {
 /* Force a compilation error if condition is true */
 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
 
-#ifdef CONFIG_SYSCTL
-extern int randomize_va_space;
-#else
-#define randomize_va_space 1
-#endif
-
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 75e9f0724997..26e1663a5cbe 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1051,5 +1051,7 @@ int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
 void drop_pagecache(void);
 void drop_slab(void);
 
+extern int randomize_va_space;
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
-- 
cgit v1.2.2


From 726c14bf499e91e7ede4f1728830aba05c675061 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 17 Feb 2006 10:30:23 +1100
Subject: [PATCH] Provide an interface for getting the current tick length

This provides an interface for arch code to find out how many
nanoseconds are going to be added on to xtime by the next call to
do_timer.  The value returned is a fixed-point number in 52.12 format
in nanoseconds.  The reason for this format is that it gives the
full precision that the timekeeping code is using internally.

The motivation for this is to fix a problem that has arisen on 32-bit
powerpc in that the value returned by do_gettimeofday drifts apart
from xtime if NTP is being used.  PowerPC is now using a lockless
do_gettimeofday based on reading the timebase register and performing
some simple arithmetic.  (This method of getting the time is also
exported to userspace via the VDSO.)  However, the factor and offset
it uses were calculated based on the nominal tick length and weren't
being adjusted when NTP varied the tick length.

Note that 64-bit powerpc has had the lockless do_gettimeofday for a
long time now.  It also had an extremely hairy routine that got called
from the 32-bit compat routine for adjtimex, which adjusted the
factor and offset according to what it thought the timekeeping code
was going to do.  Not only was this only called if a 32-bit task did
adjtimex (i.e. not if a 64-bit task did adjtimex), it was also
duplicating computations from kernel/timer.c and it wasn't clear that
it was (still) correct.

The simple solution is to ask the timekeeping code how long the
current jiffy will be on each timer interrupt, after calling
do_timer.  If this jiffy will be a different length from the last one,
we then need to compute new values for the factor and offset used in
the lockless do_gettimeofday.  In this way we can keep xtime and
do_gettimeofday in sync, even when NTP is varying the tick length.

Note that when adjtimex varies the tick length, it almost always
introduces the variation from the next tick on.  The only case I could
see where adjtimex would vary the length of the current tick is when
an old-style adjtime adjustment is being cancelled.  (It's not clear
to me why the adjustment has to be cancelled immediately rather than
from the next tick on.)  Thus I don't see any real need for a hook in
adjtimex; the rare case of an old-style adjustment being cancelled can
be fixed up at the next tick.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: john stultz <johnstul@us.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/timex.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 04a4a8cb4ed3..b7ca1204e42a 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -345,6 +345,9 @@ time_interpolator_reset(void)
 
 #endif /* !CONFIG_TIME_INTERPOLATION */
 
+/* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */
+extern u64 current_tick_length(void);
+
 #endif /* KERNEL */
 
 #endif /* LINUX_TIMEX_H */
-- 
cgit v1.2.2


From cc1887f3d8ae8ea61efa1a75af8ec0467b9dd546 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Mon, 20 Feb 2006 23:48:38 +0900
Subject: [PATCH] libata: fix qc->n_elem == 0 case handling in ata_qc_next_sg

This patch makes ata_for_each_sg() start with pad_sgent when
qc->n_elem is zero.  Previously, ata_for_each_sg() unconditionally
started with qc->__sg, handling the first sg to fill_sg() routines
even when the entry was invalid.  And while at it, unwind ?: in
ata_qc_next_sg() into if statement.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 include/linux/libata.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 9e5db2949c58..c91be5e64ede 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -556,6 +556,16 @@ ata_sg_is_last(struct scatterlist *sg, struct ata_queued_cmd *qc)
 	return 0;
 }
 
+static inline struct scatterlist *
+ata_qc_first_sg(struct ata_queued_cmd *qc)
+{
+	if (qc->n_elem)
+		return qc->__sg;
+	if (qc->pad_len)
+		return &qc->pad_sgent;
+	return NULL;
+}
+
 static inline struct scatterlist *
 ata_qc_next_sg(struct scatterlist *sg, struct ata_queued_cmd *qc)
 {
@@ -563,11 +573,13 @@ ata_qc_next_sg(struct scatterlist *sg, struct ata_queued_cmd *qc)
 		return NULL;
 	if (++sg - qc->__sg < qc->n_elem)
 		return sg;
-	return qc->pad_len ? &qc->pad_sgent : NULL;
+	if (qc->pad_len)
+		return &qc->pad_sgent;
+	return NULL;
 }
 
 #define ata_for_each_sg(sg, qc) \
-	for (sg = qc->__sg; sg; sg = ata_qc_next_sg(sg, qc))
+	for (sg = ata_qc_first_sg(qc); sg; sg = ata_qc_next_sg(sg, qc))
 
 static inline unsigned int ata_tag_valid(unsigned int tag)
 {
-- 
cgit v1.2.2


From 9b0f8b040acd8dfd23860754c0d09ff4f44e2cbc Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Mon, 20 Feb 2006 18:27:52 -0800
Subject: [PATCH] Terminate process that fails on a constrained allocation

Some allocations are restricted to a limited set of nodes (due to memory
policies or cpuset constraints).  If the page allocator is not able to find
enough memory then that does not mean that overall system memory is low.

In particular going postal and more or less randomly shooting at processes
is not likely going to help the situation but may just lead to suicide (the
whole system coming down).

It is better to signal to the process that no memory exists given the
constraints that the process (or the configuration of the process) has
placed on the allocation behavior.  The process may be killed but then the
sysadmin or developer can investigate the situation.  The solution is
similar to what we do when running out of hugepages.

This patch adds a check before we kill processes.  At that point
performance considerations do not matter much so we just scan the zonelist
and reconstruct a list of nodes.  If the list of nodes does not contain all
online nodes then this is a constrained allocation and we should kill the
current process.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/swap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index f3e17d5963c3..d572b19afb7d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -147,7 +147,7 @@ struct swap_list_t {
 #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
 
 /* linux/mm/oom_kill.c */
-extern void out_of_memory(gfp_t gfp_mask, int order);
+extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
 
 /* linux/mm/memory.c */
 extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
-- 
cgit v1.2.2


From c255d844dd73616f23e4b4733edcc2e5fa4042b2 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Mon, 20 Feb 2006 18:27:58 -0800
Subject: [PATCH] suspend-to-ram: allow video options to be set at runtime

Currently, acpi video options can only be set on kernel command line.  That's
little inflexible; I'd like userland s2ram application that just works, and
modifying kernel command line according to whitelist is not fun.  It is better
to just allow s2ram application to set video options just before suspend
(according to the whitelist).

This implements sysctl to allow setting suspend video options without reboot.

(akpm: Documentation updates for this new sysctl are pending..)

Signed-off-by: Pavel Machek <pavel@suse.cz>
Cc: "Brown, Len" <len.brown@intel.com>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/acpi.h   | 3 ++-
 include/linux/sysctl.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 84d3d9f034ce..d3bc25e6d27d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -427,7 +427,8 @@ extern int acpi_mp_config;
 extern struct acpi_table_mcfg_config *pci_mmcfg_config;
 extern int pci_mmcfg_config_num;
 
-extern int sbf_port ;
+extern int sbf_port;
+extern unsigned long acpi_video_flags;
 
 #else	/* !CONFIG_ACPI */
 
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 32a4139c4ad8..0e92bf7ec28e 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -146,6 +146,7 @@ enum
 	KERN_RANDOMIZE=68, /* int: randomize virtual address space */
 	KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */
 	KERN_SPIN_RETRY=70,	/* int: number of spinlock retries */
+	KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */
 };
 
 
-- 
cgit v1.2.2


From 7a9166e3b037296366cea6f3c97f705d33e209e6 Mon Sep 17 00:00:00 2001
From: Luke Yang <luke.adi@gmail.com>
Date: Mon, 20 Feb 2006 18:28:07 -0800
Subject: [PATCH] Fix undefined symbols for nommu architecture

Signed-off-by: Luke Yang <luke.adi@gmail.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 26e1663a5cbe..498ff8778fb6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1051,7 +1051,11 @@ int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
 void drop_pagecache(void);
 void drop_slab(void);
 
+#ifndef CONFIG_MMU
+#define randomize_va_space 0
+#else
 extern int randomize_va_space;
+#endif
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
-- 
cgit v1.2.2


From 7fd105e758c8d746d57ab7e77f100e096bf153c8 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 20 Feb 2006 18:28:08 -0800
Subject: [PATCH] Fix compile for CONFIG_SYSVIPC=n or CONFIG_SYSCTL=n

The compat syscalls are added to sys_ni.c since they are not defined if the
above CONFIG options are off.  Also, nfs would not build with CONFIG_SYSCTL
off.

Noticed by Arthur Othieno.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 547d649b274e..b4dc6e2e10c9 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -398,7 +398,7 @@ extern struct inode_operations nfs_symlink_inode_operations;
 extern int nfs_register_sysctl(void);
 extern void nfs_unregister_sysctl(void);
 #else
-#define nfs_register_sysctl() do { } while(0)
+#define nfs_register_sysctl() 0
 #define nfs_unregister_sysctl() do { } while(0)
 #endif
 
-- 
cgit v1.2.2


From 5bd546aa78b5d74f3162815e41940f862215d9e3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Fri, 17 Feb 2006 20:23:29 +0000
Subject: [MMC] Fix mmc_cmd_type() mask

It's MMC_CMD_MASK not MMC_CMD_TYPE.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/mmc/mmc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index f38872abc126..bdc556d88498 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -49,7 +49,7 @@ struct mmc_command {
 /*
  * These are the command types.
  */
-#define mmc_cmd_type(cmd)	((cmd)->flags & MMC_CMD_TYPE)
+#define mmc_cmd_type(cmd)	((cmd)->flags & MMC_CMD_MASK)
 
 	unsigned int		retries;	/* max number of retries */
 	unsigned int		error;		/* command error */
-- 
cgit v1.2.2


From fa675765afed59bb89adba3369094ebd428b930b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 22 Feb 2006 09:39:02 -0800
Subject: Revert mount/umount uevent removal

This change reverts the 033b96fd30db52a710d97b06f87d16fc59fee0f1 commit
from Kay Sievers that removed the mount/umount uevents from the kernel.
Some older versions of HAL still depend on these events to detect when a
new device has been mounted.  These events are not correctly emitted,
and are broken by design, and so, should not be relied upon by any
future program.  Instead, the /proc/mounts file should be polled to
properly detect this kind of event.

A feature-removal-schedule.txt entry has been added, noting when this
interface will be removed from the kernel.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 2a8d8da70961..c374b5fa8d3b 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -41,8 +41,10 @@ enum kobject_action {
 	KOBJ_ADD	= (__force kobject_action_t) 0x01,	/* exclusive to core */
 	KOBJ_REMOVE	= (__force kobject_action_t) 0x02,	/* exclusive to core */
 	KOBJ_CHANGE	= (__force kobject_action_t) 0x03,	/* device state change */
-	KOBJ_OFFLINE	= (__force kobject_action_t) 0x04,	/* device offline */
-	KOBJ_ONLINE	= (__force kobject_action_t) 0x05,	/* device online */
+	KOBJ_MOUNT	= (__force kobject_action_t) 0x04,	/* mount event for block devices (broken) */
+	KOBJ_UMOUNT	= (__force kobject_action_t) 0x05,	/* umount event for block devices (broken) */
+	KOBJ_OFFLINE	= (__force kobject_action_t) 0x06,	/* device offline */
+	KOBJ_ONLINE	= (__force kobject_action_t) 0x07,	/* device online */
 };
 
 struct kobject {
-- 
cgit v1.2.2


From 85edae14e4ee5e68cf037e9e4bca7498ea16874d Mon Sep 17 00:00:00 2001
From: Michal Janusz Miroslaw <M.Miroslaw@elka.pw.edu.pl>
Date: Thu, 23 Feb 2006 09:49:35 +0000
Subject: [SERIAL] Trivial comment fix: include/linux/serial_reg.h

Trivial comment fix for include/linux/serial_reg.h

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/serial_reg.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h
index 6a2bb955844b..3c8a6aa77415 100644
--- a/include/linux/serial_reg.h
+++ b/include/linux/serial_reg.h
@@ -247,10 +247,10 @@
 #define UART_CTR	0xFF
 
 /*
- * The 16C950 Additional Control Reigster
+ * The 16C950 Additional Control Register
  */
 #define UART_ACR_RXDIS	0x01	/* Receiver disable */
-#define UART_ACR_TXDIS	0x02	/* Receiver disable */
+#define UART_ACR_TXDIS	0x02	/* Transmitter disable */
 #define UART_ACR_DSRFC	0x04	/* DSR Flow Control */
 #define UART_ACR_TLENB	0x20	/* 950 trigger levels enable */
 #define UART_ACR_ICRRD	0x40	/* ICR Read enable */
-- 
cgit v1.2.2


From c04030e16dbea2f7581f82cc6688695927f6ac5b Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Fri, 24 Feb 2006 13:04:21 -0800
Subject: [PATCH] flags parameter for linkat

I'm currently at the POSIX meeting and one thing covered was the
incompatibility of Linux's link() with the POSIX definition.  The name.
Linux does not follow symlinks, POSIX requires it does.

Even if somebody thinks this is a good default behavior we cannot change this
because it would break the ABI.  But the fact remains that some application
might want this behavior.

We have one chance to help implementing this without breaking the behavior.
 For this we could use the new linkat interface which would need a new
flags parameter.  If the new parameter is AT_SYMLINK_FOLLOW the new
behavior could be invoked.

I do not want to introduce such a patch now.  But we could add the
parameter now, just don't use it.  The patch below would do this.  Can we
get this late patch applied before the release more or less fixes the
syscall API?

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d73501ba7e44..b9ea44ac0ddb 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -543,7 +543,7 @@ asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag);
 asmlinkage long sys_symlinkat(const char __user * oldname,
 			      int newdfd, const char __user * newname);
 asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
-			   int newdfd, const char __user *newname);
+			   int newdfd, const char __user *newname, int flags);
 asmlinkage long sys_renameat(int olddfd, const char __user * oldname,
 			     int newdfd, const char __user * newname);
 asmlinkage long sys_futimesat(int dfd, char __user *filename,
-- 
cgit v1.2.2


From bafac2a512bf4fd2ce7520f3976ce8aab4435f74 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 27 Feb 2006 13:04:17 -0800
Subject: [NETFILTER]: Restore {ipt,ip6t,ebt}_LOG compatibility

The nfnetlink_log infrastructure changes broke compatiblity of the LOG
targets. They currently use whatever log backend was registered first,
which means that if ipt_ULOG was loaded first, no messages will be printed
to the ring buffer anymore.

Restore compatiblity by using the old log functions by default and only use
the nf_log backend if the user explicitly said so.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_bridge/ebt_log.h | 1 +
 include/linux/netfilter_ipv4/ipt_LOG.h   | 3 ++-
 include/linux/netfilter_ipv6/ip6t_LOG.h  | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h
index 358fbc84fb59..96e231ae7554 100644
--- a/include/linux/netfilter_bridge/ebt_log.h
+++ b/include/linux/netfilter_bridge/ebt_log.h
@@ -3,6 +3,7 @@
 
 #define EBT_LOG_IP 0x01 /* if the frame is made by ip, log the ip information */
 #define EBT_LOG_ARP 0x02
+#define EBT_LOG_NFLOG 0x04
 #define EBT_LOG_MASK (EBT_LOG_IP | EBT_LOG_ARP)
 #define EBT_LOG_PREFIX_SIZE 30
 #define EBT_LOG_WATCHER "log"
diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h
index 22d16177319b..892f9a33fea8 100644
--- a/include/linux/netfilter_ipv4/ipt_LOG.h
+++ b/include/linux/netfilter_ipv4/ipt_LOG.h
@@ -6,7 +6,8 @@
 #define IPT_LOG_TCPOPT		0x02	/* Log TCP options */
 #define IPT_LOG_IPOPT		0x04	/* Log IP options */
 #define IPT_LOG_UID		0x08	/* Log UID owning local socket */
-#define IPT_LOG_MASK		0x0f
+#define IPT_LOG_NFLOG		0x10	/* Log using nf_log backend */
+#define IPT_LOG_MASK		0x1f
 
 struct ipt_log_info {
 	unsigned char level;
diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h
index 9008ff5c40ae..060c1a1c6c60 100644
--- a/include/linux/netfilter_ipv6/ip6t_LOG.h
+++ b/include/linux/netfilter_ipv6/ip6t_LOG.h
@@ -6,7 +6,8 @@
 #define IP6T_LOG_TCPOPT		0x02	/* Log TCP options */
 #define IP6T_LOG_IPOPT		0x04	/* Log IP options */
 #define IP6T_LOG_UID		0x08	/* Log UID owning local socket */
-#define IP6T_LOG_MASK		0x0f
+#define IP6T_LOG_NFLOG		0x10	/* Log using nf_log backend */
+#define IP6T_LOG_MASK		0x1f
 
 struct ip6t_log_info {
 	unsigned char level;
-- 
cgit v1.2.2


From d2b176ed878d4d5fcc0bd35656dfd373f3702af9 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Tue, 28 Feb 2006 09:42:23 -0800
Subject: [IA64] sysctl option to silence unaligned trap warnings

Allow sysadmin to disable all warnings about userland apps
making unaligned accesses by using:
 # echo 1 > /proc/sys/kernel/ignore-unaligned-usertrap
Rather than having to use prctl on a process by process basis.

Default behaivour leaves the warnings enabled.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/sysctl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 0e92bf7ec28e..bac61db26456 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -147,6 +147,7 @@ enum
 	KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */
 	KERN_SPIN_RETRY=70,	/* int: number of spinlock retries */
 	KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */
+	KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
 };
 
 
-- 
cgit v1.2.2


From 0551fbd29e16fccd46e41b7d01bf0f8f39b14212 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 28 Feb 2006 16:59:19 -0800
Subject: [PATCH] Add mm->task_size and fix powerpc vdso

This patch adds mm->task_size to keep track of the task size of a given mm
and uses that to fix the powerpc vdso so that it uses the mm task size to
decide what pages to fault in instead of the current thread flags (which
broke when ptracing).

(akpm: I expect that mm_struct.task_size will become the way in which we
finally sort out the confusion between 32-bit processes and 32-bit mm's.  It
may need tweaks, but at this stage this patch is powerpc-only.)

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b6f51e3a38ec..ff2e09c953b9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -298,8 +298,9 @@ struct mm_struct {
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
 	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
-        unsigned long mmap_base;		/* base of mmap area */
-        unsigned long cached_hole_size;         /* if non-zero, the largest hole below free_area_cache */
+	unsigned long mmap_base;		/* base of mmap area */
+	unsigned long task_size;		/* size of task vm space */
+	unsigned long cached_hole_size;         /* if non-zero, the largest hole below free_area_cache */
 	unsigned long free_area_cache;		/* first hole of size cached_hole_size or larger */
 	pgd_t * pgd;
 	atomic_t mm_users;			/* How many users with user space? */
-- 
cgit v1.2.2


From 3af1efe8a301f5b1c813f5f761cb1e10d6175605 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Thu, 2 Mar 2006 13:25:26 -0500
Subject: [PATCH] reiserfs: fix unaligned bitmap usage

The bitmaps associated with generation numbers for directory entries
are declared as an array of ints. On some platforms, this causes alignment
exceptions.

The following patch uses the standard bitmap declaration macros to
declare the bitmaps, fixing the problem.

Originally from Takashi Iwai.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Acked-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/reiserfs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 7d51149bd793..dad78cecfd20 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -1052,7 +1052,7 @@ struct reiserfs_dir_entry {
 	int de_entrylen;
 	int de_namelen;
 	char *de_name;
-	char *de_gen_number_bit_string;
+	unsigned long *de_gen_number_bit_string;
 
 	__u32 de_dir_id;
 	__u32 de_objectid;
-- 
cgit v1.2.2


From 1e4b27df55166ce3b276f55bab223fa4ae8c5525 Mon Sep 17 00:00:00 2001
From: Karsten Keil <kkeil@suse.de>
Date: Mon, 6 Mar 2006 15:42:37 -0800
Subject: [PATCH] i4l: add new PCI IDs for HFC-S PCI

Add new PCI IDs for HFC-S PCI based ISDN TA 'Primux II S0' and 'Primux II S0'
from Gerdes AG

Signed-off-by: Martin Bachem <info@colognechip.com>
Signed-off-by: Karsten Keil <kkeil@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 82b83da25d77..1709b5009d2e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1752,6 +1752,8 @@
 #define PCI_DEVICE_ID_CCD_B00B		0xb00b
 #define PCI_DEVICE_ID_CCD_B00C		0xb00c
 #define PCI_DEVICE_ID_CCD_B100		0xb100
+#define PCI_DEVICE_ID_CCD_B700		0xb700
+#define PCI_DEVICE_ID_CCD_B701		0xb701
 
 #define PCI_VENDOR_ID_EXAR		0x13a8
 #define PCI_DEVICE_ID_EXAR_XR17C152	0x0152
-- 
cgit v1.2.2


From 69239749e1ac4f3496906aa4267cb9f61ce52c9c Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 6 Mar 2006 15:42:45 -0800
Subject: [PATCH] fix next_timer_interrupt() for hrtimer

Also from Thomas Gleixner <tglx@linutronix.de>

Function next_timer_interrupt() got broken with a recent patch
6ba1b91213e81aa92b5cf7539f7d2a94ff54947c as sys_nanosleep() was moved to
hrtimer.  This broke things as next_timer_interrupt() did not check hrtimer
tree for next event.

Function next_timer_interrupt() is needed with dyntick (CONFIG_NO_IDLE_HZ,
VST) implementations, as the system can be in idle when next hrtimer event
was supposed to happen.  At least ARM and S390 currently use
next_timer_interrupt().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/hrtimer.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 6361544bb6ae..6401c31d6add 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -116,6 +116,10 @@ extern int hrtimer_try_to_cancel(struct hrtimer *timer);
 extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
 extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
 
+#ifdef CONFIG_NO_IDLE_HZ
+extern ktime_t hrtimer_get_next_event(void);
+#endif
+
 static inline int hrtimer_active(const struct hrtimer *timer)
 {
 	return timer->state == HRTIMER_PENDING;
-- 
cgit v1.2.2


From 78679302fe428f4f3dc853a51ee24f306010d874 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Mon, 6 Mar 2006 15:42:49 -0800
Subject: [PATCH] memory-hotplug compile fix

include/linux/memory_hotplug.h:53: warning: 'struct page' declared inside parameter list

(akpm: I tossed in a couple more possibly-needed-sometime struct decls too)

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/memory_hotplug.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 01f03bc06eff..968b1aa3732c 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -6,6 +6,10 @@
 #include <linux/mmzone.h>
 #include <linux/notifier.h>
 
+struct page;
+struct zone;
+struct pglist_data;
+
 #ifdef CONFIG_MEMORY_HOTPLUG
 /*
  * pgdat resizing functions
-- 
cgit v1.2.2


From a615fa83959896f8eac76c235953fb164cd1a9b9 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Mon, 6 Mar 2006 15:42:50 -0800
Subject: [PATCH] Increase max kmalloc size for very large systems

Systems with extemely large numbers of nodes or cpus need to kmalloc
structures larger than is currently supported.  This patch increases the
maximum supported size for very large systems.

This patch should have no effect on current systems.

(akpm: why not just use alloc_pages() for sysfs_cpus?)

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kmalloc_sizes.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kmalloc_sizes.h b/include/linux/kmalloc_sizes.h
index d82d4c05c12d..bda23e00ed71 100644
--- a/include/linux/kmalloc_sizes.h
+++ b/include/linux/kmalloc_sizes.h
@@ -19,8 +19,10 @@
 	CACHE(32768)
 	CACHE(65536)
 	CACHE(131072)
-#ifndef CONFIG_MMU
+#if (NR_CPUS > 512) || (MAX_NUMNODES > 256) || !defined(CONFIG_MMU)
 	CACHE(262144)
+#endif
+#ifndef CONFIG_MMU
 	CACHE(524288)
 	CACHE(1048576)
 #ifdef CONFIG_LARGE_ALLOCS
-- 
cgit v1.2.2


From a19cbd4bf258840ade3b6ee9e9256006d0644e09 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Wed, 8 Mar 2006 14:03:09 -0800
Subject: Mark the pipe file operations static

They aren't used (nor even really usable) outside of pipe.c anyway

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e059da947007..0cc34b1c42c9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1418,9 +1418,6 @@ extern int is_bad_inode(struct inode *);
 extern struct file_operations read_fifo_fops;
 extern struct file_operations write_fifo_fops;
 extern struct file_operations rdwr_fifo_fops;
-extern struct file_operations read_pipe_fops;
-extern struct file_operations write_pipe_fops;
-extern struct file_operations rdwr_pipe_fops;
 
 extern int fs_may_remount_ro(struct super_block *);
 
-- 
cgit v1.2.2


From e2bab3d92486fb781f4d06f56339264ed1492392 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 7 Mar 2006 21:55:31 -0800
Subject: [PATCH] percpu_counter_sum()

Implement percpu_counter_sum().  This is a more accurate but slower version of
percpu_counter_read_positive().

We need this for Alex's speedup-ext3_statfs patch and for the nr_file
accounting fix.  Otherwise these things would be too inaccurate on large CPU
counts.

Cc: Ravikiran G Thirumalai <kiran@scalex86.org>
Cc: Alex Tomas <alex@clusterfs.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/percpu_counter.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index bd6708e2c027..682525511c9e 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -39,6 +39,7 @@ static inline void percpu_counter_destroy(struct percpu_counter *fbc)
 }
 
 void percpu_counter_mod(struct percpu_counter *fbc, long amount);
+long percpu_counter_sum(struct percpu_counter *fbc);
 
 static inline long percpu_counter_read(struct percpu_counter *fbc)
 {
@@ -92,6 +93,11 @@ static inline long percpu_counter_read_positive(struct percpu_counter *fbc)
 	return fbc->count;
 }
 
+static inline long percpu_counter_sum(struct percpu_counter *fbc)
+{
+	return percpu_counter_read_positive(fbc);
+}
+
 #endif	/* CONFIG_SMP */
 
 static inline void percpu_counter_inc(struct percpu_counter *fbc)
-- 
cgit v1.2.2


From 21a1ea9eb40411d4ee29448c53b9e4c0654d6ceb Mon Sep 17 00:00:00 2001
From: Dipankar Sarma <dipankar@in.ibm.com>
Date: Tue, 7 Mar 2006 21:55:33 -0800
Subject: [PATCH] rcu batch tuning

This patch adds new tunables for RCU queue and finished batches.  There are
two types of controls - number of completed RCU updates invoked in a batch
(blimit) and monitoring for high rate of incoming RCUs on a cpu (qhimark,
qlowmark).

By default, the per-cpu batch limit is set to a small value.  If the input
RCU rate exceeds the high watermark, we do two things - force quiescent
state on all cpus and set the batch limit of the CPU to INTMAX.  Setting
batch limit to INTMAX forces all finished RCUs to be processed in one shot.
 If we have more than INTMAX RCUs queued up, then we have bigger problems
anyway.  Once the incoming queued RCUs fall below the low watermark, the
batch limit is set to the default.

Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/rcupdate.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b87aefa082e2..c2ec6c77874e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -98,13 +98,17 @@ struct rcu_data {
 	long  	       	batch;           /* Batch # for current RCU batch */
 	struct rcu_head *nxtlist;
 	struct rcu_head **nxttail;
-	long            count; /* # of queued items */
+	long            qlen; 	 	 /* # of queued callbacks */
 	struct rcu_head *curlist;
 	struct rcu_head **curtail;
 	struct rcu_head *donelist;
 	struct rcu_head **donetail;
+	long		blimit;		 /* Upper limit on a processed batch */
 	int cpu;
 	struct rcu_head barrier;
+#ifdef CONFIG_SMP
+	long		last_rs_qlen;	 /* qlen during the last resched */
+#endif
 };
 
 DECLARE_PER_CPU(struct rcu_data, rcu_data);
-- 
cgit v1.2.2


From 529bf6be5c04f2e869d07bfdb122e9fd98ade714 Mon Sep 17 00:00:00 2001
From: Dipankar Sarma <dipankar@in.ibm.com>
Date: Tue, 7 Mar 2006 21:55:35 -0800
Subject: [PATCH] fix file counting

I have benchmarked this on an x86_64 NUMA system and see no significant
performance difference on kernbench.  Tested on both x86_64 and powerpc.

The way we do file struct accounting is not very suitable for batched
freeing.  For scalability reasons, file accounting was
constructor/destructor based.  This meant that nr_files was decremented
only when the object was removed from the slab cache.  This is susceptible
to slab fragmentation.  With RCU based file structure, consequent batched
freeing and a test program like Serge's, we just speed this up and end up
with a very fragmented slab -

llm22:~ # cat /proc/sys/fs/file-nr
587730  0       758844

At the same time, I see only a 2000+ objects in filp cache.  The following
patch I fixes this problem.

This patch changes the file counting by removing the filp_count_lock.
Instead we use a separate percpu counter, nr_files, for now and all
accesses to it are through get_nr_files() api.  In the sysctl handler for
nr_files, we populate files_stat.nr_files before returning to user.

Counting files as an when they are created and destroyed (as opposed to
inside slab) allows us to correctly count open files with RCU.

Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/file.h | 2 --
 include/linux/fs.h   | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/file.h b/include/linux/file.h
index 418b6101b59a..9901b850f2e4 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -60,8 +60,6 @@ extern void put_filp(struct file *);
 extern int get_unused_fd(void);
 extern void FASTCALL(put_unused_fd(unsigned int fd));
 struct kmem_cache;
-extern void filp_ctor(void * objp, struct kmem_cache *cachep, unsigned long cflags);
-extern void filp_dtor(void * objp, struct kmem_cache *cachep, unsigned long dflags);
 
 extern struct file ** alloc_fd_array(int);
 extern void free_fd_array(struct file **, int);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0cc34b1c42c9..51c0c93bdf93 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -35,6 +35,7 @@ struct files_stat_struct {
 	int max_files;		/* tunable */
 };
 extern struct files_stat_struct files_stat;
+extern int get_max_files(void);
 
 struct inodes_stat_t {
 	int nr_inodes;
-- 
cgit v1.2.2


From 0ef675d491bd65028fa838015ebc6ce8abefab6f Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Thu, 9 Mar 2006 17:33:38 -0800
Subject: [PATCH] mtd: 64 bit fixes

Fix some bugs in mtd/jffs2 on 64bit platform.

The MEMGETBADBLOCK/MEMSETBADBLOCK ioctl are not listed in compat_ioctl.h.

And some variables in jffs2 are declared as uint32_t but used to hold
size_t values.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/compat_ioctl.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index 8fad50f8e389..ae7dfb790df3 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -696,6 +696,8 @@ COMPATIBLE_IOCTL(MEMLOCK)
 COMPATIBLE_IOCTL(MEMUNLOCK)
 COMPATIBLE_IOCTL(MEMGETREGIONCOUNT)
 COMPATIBLE_IOCTL(MEMGETREGIONINFO)
+COMPATIBLE_IOCTL(MEMGETBADBLOCK)
+COMPATIBLE_IOCTL(MEMSETBADBLOCK)
 /* NBD */
 ULONG_IOCTL(NBD_SET_SOCK)
 ULONG_IOCTL(NBD_SET_BLKSIZE)
-- 
cgit v1.2.2


From 8fce4d8e3b9e3cf47cc8afeb6077e22ab795d989 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Thu, 9 Mar 2006 17:33:54 -0800
Subject: [PATCH] slab: Node rotor for freeing alien caches and remote per cpu
 pages.

The cache reaper currently tries to free all alien caches and all remote
per cpu pages in each pass of cache_reap.  For a machines with large number
of nodes (such as Altix) this may lead to sporadic delays of around ~10ms.
Interrupts are disabled while reclaiming creating unacceptable delays.

This patch changes that behavior by adding a per cpu reap_node variable.
Instead of attempting to free all caches, we free only one alien cache and
the per cpu pages from one remote node.  That reduces the time spend in
cache_reap.  However, doing so will lengthen the time it takes to
completely drain all remote per cpu pagesets and all alien caches.  The
time needed will grow with the number of nodes in the system.  All caches
are drained when they overflow their respective capacity.  So the drawback
here is only that a bit of memory may be wasted for awhile longer.

Details:

1. Rename drain_remote_pages to drain_node_pages to allow the specification
   of the node to drain of pcp pages.

2. Add additional functions init_reap_node, next_reap_node for NUMA
   that manage a per cpu reap_node counter.

3. Add a reap_alien function that reaps only from the current reap_node.

For us this seems to be a critical issue.  Holdoffs of an average of ~7ms
cause some HPC benchmarks to slow down significantly.  F.e.  NAS parallel
slows down dramatically.  NAS parallel has a 12-16 seconds runtime w/o rotor
compared to 5.8 secs with the rotor patches.  It gets down to 5.05 secs with
the additional interrupt holdoff reductions.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 20f9148e38d9..7851e6b520cf 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -157,9 +157,9 @@ extern void FASTCALL(free_cold_page(struct page *page));
 
 void page_alloc_init(void);
 #ifdef CONFIG_NUMA
-void drain_remote_pages(void);
+void drain_node_pages(int node);
 #else
-static inline void drain_remote_pages(void) { };
+static inline void drain_node_pages(int node) { };
 #endif
 
 #endif /* __LINUX_GFP_H */
-- 
cgit v1.2.2


From 0adb25d2e71ab047423d6fc63d5d184590d0a66f Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@openvz.org>
Date: Sat, 11 Mar 2006 03:27:13 -0800
Subject: [PATCH] ext3: ext3_symlink should use GFP_NOFS allocations inside

This patch fixes illegal __GFP_FS allocation inside ext3 transaction in
ext3_symlink().  Such allocation may re-enter ext3 code from
try_to_free_pages.  But JBD/ext3 code keeps a pointer to current journal
handle in task_struct and, hence, is not reentrable.

This bug led to "Assertion failure in journal_dirty_metadata()" messages.

http://bugzilla.openvz.org/show_bug.cgi?id=115

Signed-off-by: Andrey Savochkin <saw@saw.sw.com.sg>
Signed-off-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51c0c93bdf93..128d0082522c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1664,6 +1664,8 @@ extern int vfs_follow_link(struct nameidata *, const char *);
 extern int page_readlink(struct dentry *, char __user *, int);
 extern void *page_follow_link_light(struct dentry *, struct nameidata *);
 extern void page_put_link(struct dentry *, struct nameidata *, void *);
+extern int __page_symlink(struct inode *inode, const char *symname, int len,
+		gfp_t gfp_mask);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern struct inode_operations page_symlink_inode_operations;
 extern int generic_readlink(struct dentry *, char __user *, int);
-- 
cgit v1.2.2


From 7cd9013be6c22f3ff6f777354f766c8c0b955e17 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 11 Mar 2006 03:27:18 -0800
Subject: [PATCH] remove __put_task_struct_cb export again

The patch '[PATCH] RCU signal handling' [1] added an export for
__put_task_struct_cb, a put_task_struct helper newly introduced in that
patch.  But the put_task_struct couldn't be used modular previously as
__put_task_struct wasn't exported.  There are not callers of it in modular
code, and it shouldn't be exported because we don't want drivers to hold
references to task_structs.

This patch removes the export and folds __put_task_struct into
__put_task_struct_cb as there's no other caller.

[1] http://www2.kernel.org/git/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=e56d090310d7625ecb43a1eeebd479f04affb48b

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Paul E. McKenney <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ff2e09c953b9..62e6314382f0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -892,7 +892,6 @@ static inline int pid_alive(struct task_struct *p)
 }
 
 extern void free_task(struct task_struct *tsk);
-extern void __put_task_struct(struct task_struct *tsk);
 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
 
 extern void __put_task_struct_cb(struct rcu_head *rhp);
-- 
cgit v1.2.2