From ee68cea2c26b7a8222f9020f54d22c6067011e8b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Feb 2006 01:34:23 -0800 Subject: [NETFILTER]: Fix xfrm lookup after SNAT To find out if a packet needs to be handled by IPsec after SNAT, packets are currently rerouted in POST_ROUTING and a new xfrm lookup is done. This breaks SNAT of non-unicast packets to non-local addresses because the packet is routed as incoming packet and no neighbour entry is bound to the dst_entry. In general, it seems to be a bad idea to replace the dst_entry after the packet was already sent to the output routine because its state might not match what's expected. This patch changes the xfrm lookup in POST_ROUTING to re-use the original dst_entry without routing the packet again. This means no policy routing can be used for transport mode transforms (which keep the original route) when packets are SNATed to match the policy, but it looks like the best we can do for now. Signed-off-by: Patrick McHardy Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index fdc4a9527343..43c09d790b83 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -79,7 +79,7 @@ enum nf_ip_hook_priorities { #ifdef __KERNEL__ extern int ip_route_me_harder(struct sk_buff **pskb); - +extern int ip_xfrm_me_harder(struct sk_buff **pskb); #endif /*__KERNEL__*/ #endif /*__LINUX_IP_NETFILTER_H*/ -- cgit v1.2.2 From 5ecfbae093f0c37311e89b29bfc0c9d586eace87 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 15 Feb 2006 22:50:10 +0300 Subject: [PATCH] fix zap_thread's ptrace related problems 1. The tracee can go from ptrace_stop() to do_signal_stop() after __ptrace_unlink(p). 2. It is unsafe to __ptrace_unlink(p) while p->parent may wait for tasklist_lock in ptrace_detach(). Signed-off-by: Oleg Nesterov Cc: Roland McGrath Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Eric W. Biederman Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 9d5cd106b344..0d36750fc0f1 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -84,6 +84,7 @@ extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __us extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); extern int ptrace_attach(struct task_struct *tsk); extern int ptrace_detach(struct task_struct *, unsigned int); +extern void __ptrace_detach(struct task_struct *, unsigned int); extern void ptrace_disable(struct task_struct *); extern int ptrace_check_attach(struct task_struct *task, int kill); extern int ptrace_request(struct task_struct *child, long request, long addr, long data); -- cgit v1.2.2 From 48d5cad87c3a4998d0bda16ccfb5c60dfe4de5fb Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Feb 2006 15:10:22 -0800 Subject: [XFRM]: Fix SNAT-related crash in xfrm4_output_finish When a packet matching an IPsec policy is SNATed so it doesn't match any policy anymore it looses its xfrm bundle, which makes xfrm4_output_finish crash because of a NULL pointer dereference. This patch directs these packets to the original output path instead. Since the packets have already passed the POST_ROUTING hook, but need to start at the beginning of the original output path which includes another POST_ROUTING invocation, a flag is added to the IPCB to indicate that the packet was rerouted and doesn't need to pass the POST_ROUTING hook again. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 4cf6088625c1..3ca3d9ee78a9 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -184,8 +184,11 @@ static inline int nf_hook_thresh(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sk_buff *), int thresh, + int cond) { + if (!cond) + return 1; #ifndef CONFIG_NETFILTER_DEBUG if (list_empty(&nf_hooks[pf][hook])) return 1; @@ -197,7 +200,7 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { - return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN); + return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN, 1); } /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -224,7 +227,13 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ ({int __ret; \ -if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1)\ +if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh, 1)) == 1)\ + __ret = (okfn)(skb); \ +__ret;}) + +#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) \ +({int __ret; \ +if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\ __ret = (okfn)(skb); \ __ret;}) @@ -295,11 +304,13 @@ extern struct proc_dir_entry *proc_net_netfilter; #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) +#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) static inline int nf_hook_thresh(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sk_buff *), int thresh, + int cond) { return okfn(*pskb); } -- cgit v1.2.2 From 9c92d3486434e7310cb288587953e2dae4a79701 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Feb 2006 15:18:19 -0800 Subject: [NETFILTER]: Don't invoke okfn in CONFIG_NETFILTER=n variant of nf_hook() nf_hook() is supposed to call the netfilter hook and return control of the packet back to the caller in case it may pass, the okfn is only used for queueing. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 3ca3d9ee78a9..468896939843 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -318,7 +318,7 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { - return okfn(*pskb); + return 1; } static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} struct flowi; -- cgit v1.2.2 From b2ee9dbfad14ba8e34a589d552ddc67300a26bec Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Wed, 15 Feb 2006 15:17:40 -0800 Subject: [PATCH] hrtimer: fix multiple macro argument expansion For two macros the arguments were expanded twice, change them to inline functions to avoid it. Signed-off-by: Roman Zippel Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ktime.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 6aca67a569a2..f3dec45ef874 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -96,10 +96,16 @@ static inline ktime_t ktime_set(const long secs, const unsigned long nsecs) ({ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }; }) /* convert a timespec to ktime_t format: */ -#define timespec_to_ktime(ts) ktime_set((ts).tv_sec, (ts).tv_nsec) +static inline ktime_t timespec_to_ktime(struct timespec ts) +{ + return ktime_set(ts.tv_sec, ts.tv_nsec); +} /* convert a timeval to ktime_t format: */ -#define timeval_to_ktime(tv) ktime_set((tv).tv_sec, (tv).tv_usec * 1000) +static inline ktime_t timeval_to_ktime(struct timeval tv) +{ + return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); +} /* Map the ktime_t to timespec conversion to ns_to_timespec function */ #define ktime_to_timespec(kt) ns_to_timespec((kt).tv64) -- cgit v1.2.2 From a62eaf151d9cb478d127cfbc2e93c498869785b0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:41:58 +0100 Subject: [PATCH] x86_64: Add boot option to disable randomized mappings and cleanup AMD SimNow!'s JIT doesn't like them at all in the guest. For distribution installation it's easiest if it's a boot time option. Also I moved the variable to a more appropiate place and make it independent from sysctl And marked __read_mostly which it is. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 6 ------ include/linux/mm.h | 2 ++ 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index b49affa0ac5a..3b507bf05d09 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -326,12 +326,6 @@ struct sysinfo { /* Force a compilation error if condition is true */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) -#ifdef CONFIG_SYSCTL -extern int randomize_va_space; -#else -#define randomize_va_space 1 -#endif - /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) diff --git a/include/linux/mm.h b/include/linux/mm.h index 75e9f0724997..26e1663a5cbe 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1051,5 +1051,7 @@ int shrink_slab(unsigned long scanned, gfp_t gfp_mask, void drop_pagecache(void); void drop_slab(void); +extern int randomize_va_space; + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ -- cgit v1.2.2 From 726c14bf499e91e7ede4f1728830aba05c675061 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 17 Feb 2006 10:30:23 +1100 Subject: [PATCH] Provide an interface for getting the current tick length This provides an interface for arch code to find out how many nanoseconds are going to be added on to xtime by the next call to do_timer. The value returned is a fixed-point number in 52.12 format in nanoseconds. The reason for this format is that it gives the full precision that the timekeeping code is using internally. The motivation for this is to fix a problem that has arisen on 32-bit powerpc in that the value returned by do_gettimeofday drifts apart from xtime if NTP is being used. PowerPC is now using a lockless do_gettimeofday based on reading the timebase register and performing some simple arithmetic. (This method of getting the time is also exported to userspace via the VDSO.) However, the factor and offset it uses were calculated based on the nominal tick length and weren't being adjusted when NTP varied the tick length. Note that 64-bit powerpc has had the lockless do_gettimeofday for a long time now. It also had an extremely hairy routine that got called from the 32-bit compat routine for adjtimex, which adjusted the factor and offset according to what it thought the timekeeping code was going to do. Not only was this only called if a 32-bit task did adjtimex (i.e. not if a 64-bit task did adjtimex), it was also duplicating computations from kernel/timer.c and it wasn't clear that it was (still) correct. The simple solution is to ask the timekeeping code how long the current jiffy will be on each timer interrupt, after calling do_timer. If this jiffy will be a different length from the last one, we then need to compute new values for the factor and offset used in the lockless do_gettimeofday. In this way we can keep xtime and do_gettimeofday in sync, even when NTP is varying the tick length. Note that when adjtimex varies the tick length, it almost always introduces the variation from the next tick on. The only case I could see where adjtimex would vary the length of the current tick is when an old-style adjtime adjustment is being cancelled. (It's not clear to me why the adjustment has to be cancelled immediately rather than from the next tick on.) Thus I don't see any real need for a hook in adjtimex; the rare case of an old-style adjustment being cancelled can be fixed up at the next tick. Signed-off-by: Paul Mackerras Acked-by: john stultz Signed-off-by: Linus Torvalds --- include/linux/timex.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index 04a4a8cb4ed3..b7ca1204e42a 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -345,6 +345,9 @@ time_interpolator_reset(void) #endif /* !CONFIG_TIME_INTERPOLATION */ +/* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ +extern u64 current_tick_length(void); + #endif /* KERNEL */ #endif /* LINUX_TIMEX_H */ -- cgit v1.2.2 From cc1887f3d8ae8ea61efa1a75af8ec0467b9dd546 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 20 Feb 2006 23:48:38 +0900 Subject: [PATCH] libata: fix qc->n_elem == 0 case handling in ata_qc_next_sg This patch makes ata_for_each_sg() start with pad_sgent when qc->n_elem is zero. Previously, ata_for_each_sg() unconditionally started with qc->__sg, handling the first sg to fill_sg() routines even when the entry was invalid. And while at it, unwind ?: in ata_qc_next_sg() into if statement. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 9e5db2949c58..c91be5e64ede 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -556,6 +556,16 @@ ata_sg_is_last(struct scatterlist *sg, struct ata_queued_cmd *qc) return 0; } +static inline struct scatterlist * +ata_qc_first_sg(struct ata_queued_cmd *qc) +{ + if (qc->n_elem) + return qc->__sg; + if (qc->pad_len) + return &qc->pad_sgent; + return NULL; +} + static inline struct scatterlist * ata_qc_next_sg(struct scatterlist *sg, struct ata_queued_cmd *qc) { @@ -563,11 +573,13 @@ ata_qc_next_sg(struct scatterlist *sg, struct ata_queued_cmd *qc) return NULL; if (++sg - qc->__sg < qc->n_elem) return sg; - return qc->pad_len ? &qc->pad_sgent : NULL; + if (qc->pad_len) + return &qc->pad_sgent; + return NULL; } #define ata_for_each_sg(sg, qc) \ - for (sg = qc->__sg; sg; sg = ata_qc_next_sg(sg, qc)) + for (sg = ata_qc_first_sg(qc); sg; sg = ata_qc_next_sg(sg, qc)) static inline unsigned int ata_tag_valid(unsigned int tag) { -- cgit v1.2.2 From 9b0f8b040acd8dfd23860754c0d09ff4f44e2cbc Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 20 Feb 2006 18:27:52 -0800 Subject: [PATCH] Terminate process that fails on a constrained allocation Some allocations are restricted to a limited set of nodes (due to memory policies or cpuset constraints). If the page allocator is not able to find enough memory then that does not mean that overall system memory is low. In particular going postal and more or less randomly shooting at processes is not likely going to help the situation but may just lead to suicide (the whole system coming down). It is better to signal to the process that no memory exists given the constraints that the process (or the configuration of the process) has placed on the allocation behavior. The process may be killed but then the sysadmin or developer can investigate the situation. The solution is similar to what we do when running out of hugepages. This patch adds a check before we kill processes. At that point performance considerations do not matter much so we just scan the zonelist and reconstruct a list of nodes. If the list of nodes does not contain all online nodes then this is a constrained allocation and we should kill the current process. Signed-off-by: Christoph Lameter Cc: Nick Piggin Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index f3e17d5963c3..d572b19afb7d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -147,7 +147,7 @@ struct swap_list_t { #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages) /* linux/mm/oom_kill.c */ -extern void out_of_memory(gfp_t gfp_mask, int order); +extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); /* linux/mm/memory.c */ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); -- cgit v1.2.2 From c255d844dd73616f23e4b4733edcc2e5fa4042b2 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Mon, 20 Feb 2006 18:27:58 -0800 Subject: [PATCH] suspend-to-ram: allow video options to be set at runtime Currently, acpi video options can only be set on kernel command line. That's little inflexible; I'd like userland s2ram application that just works, and modifying kernel command line according to whitelist is not fun. It is better to just allow s2ram application to set video options just before suspend (according to the whitelist). This implements sysctl to allow setting suspend video options without reboot. (akpm: Documentation updates for this new sysctl are pending..) Signed-off-by: Pavel Machek Cc: "Brown, Len" Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/acpi.h | 3 ++- include/linux/sysctl.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 84d3d9f034ce..d3bc25e6d27d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -427,7 +427,8 @@ extern int acpi_mp_config; extern struct acpi_table_mcfg_config *pci_mmcfg_config; extern int pci_mmcfg_config_num; -extern int sbf_port ; +extern int sbf_port; +extern unsigned long acpi_video_flags; #else /* !CONFIG_ACPI */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 32a4139c4ad8..0e92bf7ec28e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -146,6 +146,7 @@ enum KERN_RANDOMIZE=68, /* int: randomize virtual address space */ KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */ KERN_SPIN_RETRY=70, /* int: number of spinlock retries */ + KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ }; -- cgit v1.2.2 From 7a9166e3b037296366cea6f3c97f705d33e209e6 Mon Sep 17 00:00:00 2001 From: Luke Yang Date: Mon, 20 Feb 2006 18:28:07 -0800 Subject: [PATCH] Fix undefined symbols for nommu architecture Signed-off-by: Luke Yang Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 26e1663a5cbe..498ff8778fb6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1051,7 +1051,11 @@ int shrink_slab(unsigned long scanned, gfp_t gfp_mask, void drop_pagecache(void); void drop_slab(void); +#ifndef CONFIG_MMU +#define randomize_va_space 0 +#else extern int randomize_va_space; +#endif #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ -- cgit v1.2.2 From 7fd105e758c8d746d57ab7e77f100e096bf153c8 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 20 Feb 2006 18:28:08 -0800 Subject: [PATCH] Fix compile for CONFIG_SYSVIPC=n or CONFIG_SYSCTL=n The compat syscalls are added to sys_ni.c since they are not defined if the above CONFIG options are off. Also, nfs would not build with CONFIG_SYSCTL off. Noticed by Arthur Othieno. Signed-off-by: Stephen Rothwell Cc: "David S. Miller" Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 547d649b274e..b4dc6e2e10c9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -398,7 +398,7 @@ extern struct inode_operations nfs_symlink_inode_operations; extern int nfs_register_sysctl(void); extern void nfs_unregister_sysctl(void); #else -#define nfs_register_sysctl() do { } while(0) +#define nfs_register_sysctl() 0 #define nfs_unregister_sysctl() do { } while(0) #endif -- cgit v1.2.2 From 5bd546aa78b5d74f3162815e41940f862215d9e3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 17 Feb 2006 20:23:29 +0000 Subject: [MMC] Fix mmc_cmd_type() mask It's MMC_CMD_MASK not MMC_CMD_TYPE. Signed-off-by: Russell King --- include/linux/mmc/mmc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index f38872abc126..bdc556d88498 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -49,7 +49,7 @@ struct mmc_command { /* * These are the command types. */ -#define mmc_cmd_type(cmd) ((cmd)->flags & MMC_CMD_TYPE) +#define mmc_cmd_type(cmd) ((cmd)->flags & MMC_CMD_MASK) unsigned int retries; /* max number of retries */ unsigned int error; /* command error */ -- cgit v1.2.2 From fa675765afed59bb89adba3369094ebd428b930b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 22 Feb 2006 09:39:02 -0800 Subject: Revert mount/umount uevent removal This change reverts the 033b96fd30db52a710d97b06f87d16fc59fee0f1 commit from Kay Sievers that removed the mount/umount uevents from the kernel. Some older versions of HAL still depend on these events to detect when a new device has been mounted. These events are not correctly emitted, and are broken by design, and so, should not be relied upon by any future program. Instead, the /proc/mounts file should be polled to properly detect this kind of event. A feature-removal-schedule.txt entry has been added, noting when this interface will be removed from the kernel. Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 2a8d8da70961..c374b5fa8d3b 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -41,8 +41,10 @@ enum kobject_action { KOBJ_ADD = (__force kobject_action_t) 0x01, /* exclusive to core */ KOBJ_REMOVE = (__force kobject_action_t) 0x02, /* exclusive to core */ KOBJ_CHANGE = (__force kobject_action_t) 0x03, /* device state change */ - KOBJ_OFFLINE = (__force kobject_action_t) 0x04, /* device offline */ - KOBJ_ONLINE = (__force kobject_action_t) 0x05, /* device online */ + KOBJ_MOUNT = (__force kobject_action_t) 0x04, /* mount event for block devices (broken) */ + KOBJ_UMOUNT = (__force kobject_action_t) 0x05, /* umount event for block devices (broken) */ + KOBJ_OFFLINE = (__force kobject_action_t) 0x06, /* device offline */ + KOBJ_ONLINE = (__force kobject_action_t) 0x07, /* device online */ }; struct kobject { -- cgit v1.2.2 From 85edae14e4ee5e68cf037e9e4bca7498ea16874d Mon Sep 17 00:00:00 2001 From: Michal Janusz Miroslaw Date: Thu, 23 Feb 2006 09:49:35 +0000 Subject: [SERIAL] Trivial comment fix: include/linux/serial_reg.h Trivial comment fix for include/linux/serial_reg.h Signed-off-by: Russell King --- include/linux/serial_reg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h index 6a2bb955844b..3c8a6aa77415 100644 --- a/include/linux/serial_reg.h +++ b/include/linux/serial_reg.h @@ -247,10 +247,10 @@ #define UART_CTR 0xFF /* - * The 16C950 Additional Control Reigster + * The 16C950 Additional Control Register */ #define UART_ACR_RXDIS 0x01 /* Receiver disable */ -#define UART_ACR_TXDIS 0x02 /* Receiver disable */ +#define UART_ACR_TXDIS 0x02 /* Transmitter disable */ #define UART_ACR_DSRFC 0x04 /* DSR Flow Control */ #define UART_ACR_TLENB 0x20 /* 950 trigger levels enable */ #define UART_ACR_ICRRD 0x40 /* ICR Read enable */ -- cgit v1.2.2 From c04030e16dbea2f7581f82cc6688695927f6ac5b Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 24 Feb 2006 13:04:21 -0800 Subject: [PATCH] flags parameter for linkat I'm currently at the POSIX meeting and one thing covered was the incompatibility of Linux's link() with the POSIX definition. The name. Linux does not follow symlinks, POSIX requires it does. Even if somebody thinks this is a good default behavior we cannot change this because it would break the ABI. But the fact remains that some application might want this behavior. We have one chance to help implementing this without breaking the behavior. For this we could use the new linkat interface which would need a new flags parameter. If the new parameter is AT_SYMLINK_FOLLOW the new behavior could be invoked. I do not want to introduce such a patch now. But we could add the parameter now, just don't use it. The patch below would do this. Can we get this late patch applied before the release more or less fixes the syscall API? Signed-off-by: Ulrich Drepper Signed-off-by: Ralf Baechle Cc: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d73501ba7e44..b9ea44ac0ddb 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -543,7 +543,7 @@ asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag); asmlinkage long sys_symlinkat(const char __user * oldname, int newdfd, const char __user * newname); asmlinkage long sys_linkat(int olddfd, const char __user *oldname, - int newdfd, const char __user *newname); + int newdfd, const char __user *newname, int flags); asmlinkage long sys_renameat(int olddfd, const char __user * oldname, int newdfd, const char __user * newname); asmlinkage long sys_futimesat(int dfd, char __user *filename, -- cgit v1.2.2 From bafac2a512bf4fd2ce7520f3976ce8aab4435f74 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 27 Feb 2006 13:04:17 -0800 Subject: [NETFILTER]: Restore {ipt,ip6t,ebt}_LOG compatibility The nfnetlink_log infrastructure changes broke compatiblity of the LOG targets. They currently use whatever log backend was registered first, which means that if ipt_ULOG was loaded first, no messages will be printed to the ring buffer anymore. Restore compatiblity by using the old log functions by default and only use the nf_log backend if the user explicitly said so. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_bridge/ebt_log.h | 1 + include/linux/netfilter_ipv4/ipt_LOG.h | 3 ++- include/linux/netfilter_ipv6/ip6t_LOG.h | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h index 358fbc84fb59..96e231ae7554 100644 --- a/include/linux/netfilter_bridge/ebt_log.h +++ b/include/linux/netfilter_bridge/ebt_log.h @@ -3,6 +3,7 @@ #define EBT_LOG_IP 0x01 /* if the frame is made by ip, log the ip information */ #define EBT_LOG_ARP 0x02 +#define EBT_LOG_NFLOG 0x04 #define EBT_LOG_MASK (EBT_LOG_IP | EBT_LOG_ARP) #define EBT_LOG_PREFIX_SIZE 30 #define EBT_LOG_WATCHER "log" diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h index 22d16177319b..892f9a33fea8 100644 --- a/include/linux/netfilter_ipv4/ipt_LOG.h +++ b/include/linux/netfilter_ipv4/ipt_LOG.h @@ -6,7 +6,8 @@ #define IPT_LOG_TCPOPT 0x02 /* Log TCP options */ #define IPT_LOG_IPOPT 0x04 /* Log IP options */ #define IPT_LOG_UID 0x08 /* Log UID owning local socket */ -#define IPT_LOG_MASK 0x0f +#define IPT_LOG_NFLOG 0x10 /* Log using nf_log backend */ +#define IPT_LOG_MASK 0x1f struct ipt_log_info { unsigned char level; diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h index 9008ff5c40ae..060c1a1c6c60 100644 --- a/include/linux/netfilter_ipv6/ip6t_LOG.h +++ b/include/linux/netfilter_ipv6/ip6t_LOG.h @@ -6,7 +6,8 @@ #define IP6T_LOG_TCPOPT 0x02 /* Log TCP options */ #define IP6T_LOG_IPOPT 0x04 /* Log IP options */ #define IP6T_LOG_UID 0x08 /* Log UID owning local socket */ -#define IP6T_LOG_MASK 0x0f +#define IP6T_LOG_NFLOG 0x10 /* Log using nf_log backend */ +#define IP6T_LOG_MASK 0x1f struct ip6t_log_info { unsigned char level; -- cgit v1.2.2 From d2b176ed878d4d5fcc0bd35656dfd373f3702af9 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 28 Feb 2006 09:42:23 -0800 Subject: [IA64] sysctl option to silence unaligned trap warnings Allow sysadmin to disable all warnings about userland apps making unaligned accesses by using: # echo 1 > /proc/sys/kernel/ignore-unaligned-usertrap Rather than having to use prctl on a process by process basis. Default behaivour leaves the warnings enabled. Signed-off-by: Jes Sorensen Signed-off-by: Tony Luck --- include/linux/sysctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 0e92bf7ec28e..bac61db26456 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -147,6 +147,7 @@ enum KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */ KERN_SPIN_RETRY=70, /* int: number of spinlock retries */ KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ + KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ }; -- cgit v1.2.2 From 0551fbd29e16fccd46e41b7d01bf0f8f39b14212 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 28 Feb 2006 16:59:19 -0800 Subject: [PATCH] Add mm->task_size and fix powerpc vdso This patch adds mm->task_size to keep track of the task size of a given mm and uses that to fix the powerpc vdso so that it uses the mm task size to decide what pages to fault in instead of the current thread flags (which broke when ptracing). (akpm: I expect that mm_struct.task_size will become the way in which we finally sort out the confusion between 32-bit processes and 32-bit mm's. It may need tweaks, but at this stage this patch is powerpc-only.) Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b6f51e3a38ec..ff2e09c953b9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -298,8 +298,9 @@ struct mm_struct { unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); void (*unmap_area) (struct mm_struct *mm, unsigned long addr); - unsigned long mmap_base; /* base of mmap area */ - unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ + unsigned long mmap_base; /* base of mmap area */ + unsigned long task_size; /* size of task vm space */ + unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */ pgd_t * pgd; atomic_t mm_users; /* How many users with user space? */ -- cgit v1.2.2 From 3af1efe8a301f5b1c813f5f761cb1e10d6175605 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 2 Mar 2006 13:25:26 -0500 Subject: [PATCH] reiserfs: fix unaligned bitmap usage The bitmaps associated with generation numbers for directory entries are declared as an array of ints. On some platforms, this causes alignment exceptions. The following patch uses the standard bitmap declaration macros to declare the bitmaps, fixing the problem. Originally from Takashi Iwai. Signed-off-by: Takashi Iwai Acked-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- include/linux/reiserfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 7d51149bd793..dad78cecfd20 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -1052,7 +1052,7 @@ struct reiserfs_dir_entry { int de_entrylen; int de_namelen; char *de_name; - char *de_gen_number_bit_string; + unsigned long *de_gen_number_bit_string; __u32 de_dir_id; __u32 de_objectid; -- cgit v1.2.2 From 1e4b27df55166ce3b276f55bab223fa4ae8c5525 Mon Sep 17 00:00:00 2001 From: Karsten Keil Date: Mon, 6 Mar 2006 15:42:37 -0800 Subject: [PATCH] i4l: add new PCI IDs for HFC-S PCI Add new PCI IDs for HFC-S PCI based ISDN TA 'Primux II S0' and 'Primux II S0' from Gerdes AG Signed-off-by: Martin Bachem Signed-off-by: Karsten Keil Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 82b83da25d77..1709b5009d2e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1752,6 +1752,8 @@ #define PCI_DEVICE_ID_CCD_B00B 0xb00b #define PCI_DEVICE_ID_CCD_B00C 0xb00c #define PCI_DEVICE_ID_CCD_B100 0xb100 +#define PCI_DEVICE_ID_CCD_B700 0xb700 +#define PCI_DEVICE_ID_CCD_B701 0xb701 #define PCI_VENDOR_ID_EXAR 0x13a8 #define PCI_DEVICE_ID_EXAR_XR17C152 0x0152 -- cgit v1.2.2 From 69239749e1ac4f3496906aa4267cb9f61ce52c9c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 6 Mar 2006 15:42:45 -0800 Subject: [PATCH] fix next_timer_interrupt() for hrtimer Also from Thomas Gleixner Function next_timer_interrupt() got broken with a recent patch 6ba1b91213e81aa92b5cf7539f7d2a94ff54947c as sys_nanosleep() was moved to hrtimer. This broke things as next_timer_interrupt() did not check hrtimer tree for next event. Function next_timer_interrupt() is needed with dyntick (CONFIG_NO_IDLE_HZ, VST) implementations, as the system can be in idle when next hrtimer event was supposed to happen. At least ARM and S390 currently use next_timer_interrupt(). Signed-off-by: Thomas Gleixner Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 6361544bb6ae..6401c31d6add 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -116,6 +116,10 @@ extern int hrtimer_try_to_cancel(struct hrtimer *timer); extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp); +#ifdef CONFIG_NO_IDLE_HZ +extern ktime_t hrtimer_get_next_event(void); +#endif + static inline int hrtimer_active(const struct hrtimer *timer) { return timer->state == HRTIMER_PENDING; -- cgit v1.2.2 From 78679302fe428f4f3dc853a51ee24f306010d874 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 6 Mar 2006 15:42:49 -0800 Subject: [PATCH] memory-hotplug compile fix include/linux/memory_hotplug.h:53: warning: 'struct page' declared inside parameter list (akpm: I tossed in a couple more possibly-needed-sometime struct decls too) Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 01f03bc06eff..968b1aa3732c 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -6,6 +6,10 @@ #include #include +struct page; +struct zone; +struct pglist_data; + #ifdef CONFIG_MEMORY_HOTPLUG /* * pgdat resizing functions -- cgit v1.2.2 From a615fa83959896f8eac76c235953fb164cd1a9b9 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Mon, 6 Mar 2006 15:42:50 -0800 Subject: [PATCH] Increase max kmalloc size for very large systems Systems with extemely large numbers of nodes or cpus need to kmalloc structures larger than is currently supported. This patch increases the maximum supported size for very large systems. This patch should have no effect on current systems. (akpm: why not just use alloc_pages() for sysfs_cpus?) Signed-off-by: Jack Steiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmalloc_sizes.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kmalloc_sizes.h b/include/linux/kmalloc_sizes.h index d82d4c05c12d..bda23e00ed71 100644 --- a/include/linux/kmalloc_sizes.h +++ b/include/linux/kmalloc_sizes.h @@ -19,8 +19,10 @@ CACHE(32768) CACHE(65536) CACHE(131072) -#ifndef CONFIG_MMU +#if (NR_CPUS > 512) || (MAX_NUMNODES > 256) || !defined(CONFIG_MMU) CACHE(262144) +#endif +#ifndef CONFIG_MMU CACHE(524288) CACHE(1048576) #ifdef CONFIG_LARGE_ALLOCS -- cgit v1.2.2 From a19cbd4bf258840ade3b6ee9e9256006d0644e09 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Mar 2006 14:03:09 -0800 Subject: Mark the pipe file operations static They aren't used (nor even really usable) outside of pipe.c anyway Signed-off-by: Linus Torvalds --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e059da947007..0cc34b1c42c9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1418,9 +1418,6 @@ extern int is_bad_inode(struct inode *); extern struct file_operations read_fifo_fops; extern struct file_operations write_fifo_fops; extern struct file_operations rdwr_fifo_fops; -extern struct file_operations read_pipe_fops; -extern struct file_operations write_pipe_fops; -extern struct file_operations rdwr_pipe_fops; extern int fs_may_remount_ro(struct super_block *); -- cgit v1.2.2 From e2bab3d92486fb781f4d06f56339264ed1492392 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 7 Mar 2006 21:55:31 -0800 Subject: [PATCH] percpu_counter_sum() Implement percpu_counter_sum(). This is a more accurate but slower version of percpu_counter_read_positive(). We need this for Alex's speedup-ext3_statfs patch and for the nr_file accounting fix. Otherwise these things would be too inaccurate on large CPU counts. Cc: Ravikiran G Thirumalai Cc: Alex Tomas Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu_counter.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index bd6708e2c027..682525511c9e 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -39,6 +39,7 @@ static inline void percpu_counter_destroy(struct percpu_counter *fbc) } void percpu_counter_mod(struct percpu_counter *fbc, long amount); +long percpu_counter_sum(struct percpu_counter *fbc); static inline long percpu_counter_read(struct percpu_counter *fbc) { @@ -92,6 +93,11 @@ static inline long percpu_counter_read_positive(struct percpu_counter *fbc) return fbc->count; } +static inline long percpu_counter_sum(struct percpu_counter *fbc) +{ + return percpu_counter_read_positive(fbc); +} + #endif /* CONFIG_SMP */ static inline void percpu_counter_inc(struct percpu_counter *fbc) -- cgit v1.2.2 From 21a1ea9eb40411d4ee29448c53b9e4c0654d6ceb Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Tue, 7 Mar 2006 21:55:33 -0800 Subject: [PATCH] rcu batch tuning This patch adds new tunables for RCU queue and finished batches. There are two types of controls - number of completed RCU updates invoked in a batch (blimit) and monitoring for high rate of incoming RCUs on a cpu (qhimark, qlowmark). By default, the per-cpu batch limit is set to a small value. If the input RCU rate exceeds the high watermark, we do two things - force quiescent state on all cpus and set the batch limit of the CPU to INTMAX. Setting batch limit to INTMAX forces all finished RCUs to be processed in one shot. If we have more than INTMAX RCUs queued up, then we have bigger problems anyway. Once the incoming queued RCUs fall below the low watermark, the batch limit is set to the default. Signed-off-by: Dipankar Sarma Cc: "Paul E. McKenney" Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rcupdate.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b87aefa082e2..c2ec6c77874e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -98,13 +98,17 @@ struct rcu_data { long batch; /* Batch # for current RCU batch */ struct rcu_head *nxtlist; struct rcu_head **nxttail; - long count; /* # of queued items */ + long qlen; /* # of queued callbacks */ struct rcu_head *curlist; struct rcu_head **curtail; struct rcu_head *donelist; struct rcu_head **donetail; + long blimit; /* Upper limit on a processed batch */ int cpu; struct rcu_head barrier; +#ifdef CONFIG_SMP + long last_rs_qlen; /* qlen during the last resched */ +#endif }; DECLARE_PER_CPU(struct rcu_data, rcu_data); -- cgit v1.2.2 From 529bf6be5c04f2e869d07bfdb122e9fd98ade714 Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Tue, 7 Mar 2006 21:55:35 -0800 Subject: [PATCH] fix file counting I have benchmarked this on an x86_64 NUMA system and see no significant performance difference on kernbench. Tested on both x86_64 and powerpc. The way we do file struct accounting is not very suitable for batched freeing. For scalability reasons, file accounting was constructor/destructor based. This meant that nr_files was decremented only when the object was removed from the slab cache. This is susceptible to slab fragmentation. With RCU based file structure, consequent batched freeing and a test program like Serge's, we just speed this up and end up with a very fragmented slab - llm22:~ # cat /proc/sys/fs/file-nr 587730 0 758844 At the same time, I see only a 2000+ objects in filp cache. The following patch I fixes this problem. This patch changes the file counting by removing the filp_count_lock. Instead we use a separate percpu counter, nr_files, for now and all accesses to it are through get_nr_files() api. In the sysctl handler for nr_files, we populate files_stat.nr_files before returning to user. Counting files as an when they are created and destroyed (as opposed to inside slab) allows us to correctly count open files with RCU. Signed-off-by: Dipankar Sarma Cc: "Paul E. McKenney" Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/file.h | 2 -- include/linux/fs.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/file.h b/include/linux/file.h index 418b6101b59a..9901b850f2e4 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -60,8 +60,6 @@ extern void put_filp(struct file *); extern int get_unused_fd(void); extern void FASTCALL(put_unused_fd(unsigned int fd)); struct kmem_cache; -extern void filp_ctor(void * objp, struct kmem_cache *cachep, unsigned long cflags); -extern void filp_dtor(void * objp, struct kmem_cache *cachep, unsigned long dflags); extern struct file ** alloc_fd_array(int); extern void free_fd_array(struct file **, int); diff --git a/include/linux/fs.h b/include/linux/fs.h index 0cc34b1c42c9..51c0c93bdf93 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -35,6 +35,7 @@ struct files_stat_struct { int max_files; /* tunable */ }; extern struct files_stat_struct files_stat; +extern int get_max_files(void); struct inodes_stat_t { int nr_inodes; -- cgit v1.2.2 From 0ef675d491bd65028fa838015ebc6ce8abefab6f Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Thu, 9 Mar 2006 17:33:38 -0800 Subject: [PATCH] mtd: 64 bit fixes Fix some bugs in mtd/jffs2 on 64bit platform. The MEMGETBADBLOCK/MEMSETBADBLOCK ioctl are not listed in compat_ioctl.h. And some variables in jffs2 are declared as uint32_t but used to hold size_t values. Signed-off-by: Atsushi Nemoto Cc: Thomas Gleixner Acked-by: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat_ioctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index 8fad50f8e389..ae7dfb790df3 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -696,6 +696,8 @@ COMPATIBLE_IOCTL(MEMLOCK) COMPATIBLE_IOCTL(MEMUNLOCK) COMPATIBLE_IOCTL(MEMGETREGIONCOUNT) COMPATIBLE_IOCTL(MEMGETREGIONINFO) +COMPATIBLE_IOCTL(MEMGETBADBLOCK) +COMPATIBLE_IOCTL(MEMSETBADBLOCK) /* NBD */ ULONG_IOCTL(NBD_SET_SOCK) ULONG_IOCTL(NBD_SET_BLKSIZE) -- cgit v1.2.2 From 8fce4d8e3b9e3cf47cc8afeb6077e22ab795d989 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 9 Mar 2006 17:33:54 -0800 Subject: [PATCH] slab: Node rotor for freeing alien caches and remote per cpu pages. The cache reaper currently tries to free all alien caches and all remote per cpu pages in each pass of cache_reap. For a machines with large number of nodes (such as Altix) this may lead to sporadic delays of around ~10ms. Interrupts are disabled while reclaiming creating unacceptable delays. This patch changes that behavior by adding a per cpu reap_node variable. Instead of attempting to free all caches, we free only one alien cache and the per cpu pages from one remote node. That reduces the time spend in cache_reap. However, doing so will lengthen the time it takes to completely drain all remote per cpu pagesets and all alien caches. The time needed will grow with the number of nodes in the system. All caches are drained when they overflow their respective capacity. So the drawback here is only that a bit of memory may be wasted for awhile longer. Details: 1. Rename drain_remote_pages to drain_node_pages to allow the specification of the node to drain of pcp pages. 2. Add additional functions init_reap_node, next_reap_node for NUMA that manage a per cpu reap_node counter. 3. Add a reap_alien function that reaps only from the current reap_node. For us this seems to be a critical issue. Holdoffs of an average of ~7ms cause some HPC benchmarks to slow down significantly. F.e. NAS parallel slows down dramatically. NAS parallel has a 12-16 seconds runtime w/o rotor compared to 5.8 secs with the rotor patches. It gets down to 5.05 secs with the additional interrupt holdoff reductions. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 20f9148e38d9..7851e6b520cf 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -157,9 +157,9 @@ extern void FASTCALL(free_cold_page(struct page *page)); void page_alloc_init(void); #ifdef CONFIG_NUMA -void drain_remote_pages(void); +void drain_node_pages(int node); #else -static inline void drain_remote_pages(void) { }; +static inline void drain_node_pages(int node) { }; #endif #endif /* __LINUX_GFP_H */ -- cgit v1.2.2 From 0adb25d2e71ab047423d6fc63d5d184590d0a66f Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Sat, 11 Mar 2006 03:27:13 -0800 Subject: [PATCH] ext3: ext3_symlink should use GFP_NOFS allocations inside This patch fixes illegal __GFP_FS allocation inside ext3 transaction in ext3_symlink(). Such allocation may re-enter ext3 code from try_to_free_pages. But JBD/ext3 code keeps a pointer to current journal handle in task_struct and, hence, is not reentrable. This bug led to "Assertion failure in journal_dirty_metadata()" messages. http://bugzilla.openvz.org/show_bug.cgi?id=115 Signed-off-by: Andrey Savochkin Signed-off-by: Kirill Korotaev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 51c0c93bdf93..128d0082522c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1664,6 +1664,8 @@ extern int vfs_follow_link(struct nameidata *, const char *); extern int page_readlink(struct dentry *, char __user *, int); extern void *page_follow_link_light(struct dentry *, struct nameidata *); extern void page_put_link(struct dentry *, struct nameidata *, void *); +extern int __page_symlink(struct inode *inode, const char *symname, int len, + gfp_t gfp_mask); extern int page_symlink(struct inode *inode, const char *symname, int len); extern struct inode_operations page_symlink_inode_operations; extern int generic_readlink(struct dentry *, char __user *, int); -- cgit v1.2.2 From 7cd9013be6c22f3ff6f777354f766c8c0b955e17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 11 Mar 2006 03:27:18 -0800 Subject: [PATCH] remove __put_task_struct_cb export again The patch '[PATCH] RCU signal handling' [1] added an export for __put_task_struct_cb, a put_task_struct helper newly introduced in that patch. But the put_task_struct couldn't be used modular previously as __put_task_struct wasn't exported. There are not callers of it in modular code, and it shouldn't be exported because we don't want drivers to hold references to task_structs. This patch removes the export and folds __put_task_struct into __put_task_struct_cb as there's no other caller. [1] http://www2.kernel.org/git/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=e56d090310d7625ecb43a1eeebd479f04affb48b Signed-off-by: Christoph Hellwig Acked-by: Paul E. McKenney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ff2e09c953b9..62e6314382f0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -892,7 +892,6 @@ static inline int pid_alive(struct task_struct *p) } extern void free_task(struct task_struct *tsk); -extern void __put_task_struct(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) extern void __put_task_struct_cb(struct rcu_head *rhp); -- cgit v1.2.2