From 36d8593ec74dc04d3bd7c1c897a7b7cfbd0b0dc6 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Sat, 19 Feb 2011 15:34:50 +0000 Subject: Make clocksource name const As nothing should be writing to the clocksource name string, make the clocksource name pointer const. Build-tested on ARM Versatile Express. Signed-off-by: Russell King Signed-off-by: John Stultz --- include/linux/clocksource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index c37b21ad5a3b..94c1f38e922a 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -161,7 +161,7 @@ struct clocksource { /* * First part of structure is read mostly */ - char *name; + const char *name; struct list_head list; int rating; cycle_t (*read)(struct clocksource *cs); -- cgit v1.2.2 From d430d3d7e646eb1eac2bb4aa244a644312e67c76 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 16 Mar 2011 17:29:47 -0400 Subject: jump label: Introduce static_branch() interface Introduce: static __always_inline bool static_branch(struct jump_label_key *key); instead of the old JUMP_LABEL(key, label) macro. In this way, jump labels become really easy to use: Define: struct jump_label_key jump_key; Can be used as: if (static_branch(&jump_key)) do unlikely code enable/disale via: jump_label_inc(&jump_key); jump_label_dec(&jump_key); that's it! For the jump labels disabled case, the static_branch() becomes an atomic_read(), and jump_label_inc()/dec() are simply atomic_inc(), atomic_dec() operations. We show testing results for this change below. Thanks to H. Peter Anvin for suggesting the 'static_branch()' construct. Since we now require a 'struct jump_label_key *key', we can store a pointer into the jump table addresses. In this way, we can enable/disable jump labels, in basically constant time. This change allows us to completely remove the previous hashtable scheme. Thanks to Peter Zijlstra for this re-write. Testing: I ran a series of 'tbench 20' runs 5 times (with reboots) for 3 configurations, where tracepoints were disabled. jump label configured in avg: 815.6 jump label *not* configured in (using atomic reads) avg: 800.1 jump label *not* configured in (regular reads) avg: 803.4 Signed-off-by: Peter Zijlstra LKML-Reference: <20110316212947.GA8792@redhat.com> Signed-off-by: Jason Baron Suggested-by: H. Peter Anvin Tested-by: David Daney Acked-by: Ralf Baechle Acked-by: David S. Miller Acked-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- include/asm-generic/vmlinux.lds.h | 14 ++---- include/linux/dynamic_debug.h | 2 - include/linux/jump_label.h | 89 ++++++++++++++++++++++++--------------- include/linux/jump_label_ref.h | 44 ------------------- include/linux/perf_event.h | 26 ++++++------ include/linux/tracepoint.h | 22 +++++----- 6 files changed, 83 insertions(+), 114 deletions(-) delete mode 100644 include/linux/jump_label_ref.h (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 32c45e5fe0ab..79522166d7f1 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -170,6 +170,10 @@ STRUCT_ALIGN(); \ *(__tracepoints) \ /* implement dynamic printk debug */ \ + . = ALIGN(8); \ + VMLINUX_SYMBOL(__start___jump_table) = .; \ + *(__jump_table) \ + VMLINUX_SYMBOL(__stop___jump_table) = .; \ . = ALIGN(8); \ VMLINUX_SYMBOL(__start___verbose) = .; \ *(__verbose) \ @@ -228,8 +232,6 @@ \ BUG_TABLE \ \ - JUMP_TABLE \ - \ /* PCI quirks */ \ .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ @@ -589,14 +591,6 @@ #define BUG_TABLE #endif -#define JUMP_TABLE \ - . = ALIGN(8); \ - __jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) { \ - VMLINUX_SYMBOL(__start___jump_table) = .; \ - *(__jump_table) \ - VMLINUX_SYMBOL(__stop___jump_table) = .; \ - } - #ifdef CONFIG_PM_TRACE #define TRACEDATA \ . = ALIGN(4); \ diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 0c9653f11c18..e747ecd48e1c 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -1,8 +1,6 @@ #ifndef _DYNAMIC_DEBUG_H #define _DYNAMIC_DEBUG_H -#include - /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They * use independent hash functions, to reduce the chance of false positives. diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 7880f18e4b86..83e745f3ead7 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -1,20 +1,43 @@ #ifndef _LINUX_JUMP_LABEL_H #define _LINUX_JUMP_LABEL_H +#include +#include + #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) + +struct jump_label_key { + atomic_t enabled; + struct jump_entry *entries; +#ifdef CONFIG_MODULES + struct jump_label_mod *next; +#endif +}; + # include # define HAVE_JUMP_LABEL #endif enum jump_label_type { + JUMP_LABEL_DISABLE = 0, JUMP_LABEL_ENABLE, - JUMP_LABEL_DISABLE }; struct module; #ifdef HAVE_JUMP_LABEL +#ifdef CONFIG_MODULES +#define JUMP_LABEL_INIT {{ 0 }, NULL, NULL} +#else +#define JUMP_LABEL_INIT {{ 0 }, NULL} +#endif + +static __always_inline bool static_branch(struct jump_label_key *key) +{ + return arch_static_branch(key); +} + extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; @@ -23,37 +46,37 @@ extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_text_poke_early(jump_label_t addr); -extern void jump_label_update(unsigned long key, enum jump_label_type type); -extern void jump_label_apply_nops(struct module *mod); extern int jump_label_text_reserved(void *start, void *end); +extern void jump_label_inc(struct jump_label_key *key); +extern void jump_label_dec(struct jump_label_key *key); +extern bool jump_label_enabled(struct jump_label_key *key); +extern void jump_label_apply_nops(struct module *mod); -#define jump_label_enable(key) \ - jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE); +#else -#define jump_label_disable(key) \ - jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE); +#include -#else +#define JUMP_LABEL_INIT {ATOMIC_INIT(0)} -#define JUMP_LABEL(key, label) \ -do { \ - if (unlikely(*key)) \ - goto label; \ -} while (0) +struct jump_label_key { + atomic_t enabled; +}; -#define jump_label_enable(cond_var) \ -do { \ - *(cond_var) = 1; \ -} while (0) +static __always_inline bool static_branch(struct jump_label_key *key) +{ + if (unlikely(atomic_read(&key->enabled))) + return true; + return false; +} -#define jump_label_disable(cond_var) \ -do { \ - *(cond_var) = 0; \ -} while (0) +static inline void jump_label_inc(struct jump_label_key *key) +{ + atomic_inc(&key->enabled); +} -static inline int jump_label_apply_nops(struct module *mod) +static inline void jump_label_dec(struct jump_label_key *key) { - return 0; + atomic_dec(&key->enabled); } static inline int jump_label_text_reserved(void *start, void *end) @@ -64,16 +87,16 @@ static inline int jump_label_text_reserved(void *start, void *end) static inline void jump_label_lock(void) {} static inline void jump_label_unlock(void) {} -#endif +static inline bool jump_label_enabled(struct jump_label_key *key) +{ + return !!atomic_read(&key->enabled); +} -#define COND_STMT(key, stmt) \ -do { \ - __label__ jl_enabled; \ - JUMP_LABEL(key, jl_enabled); \ - if (0) { \ -jl_enabled: \ - stmt; \ - } \ -} while (0) +static inline int jump_label_apply_nops(struct module *mod) +{ + return 0; +} + +#endif #endif diff --git a/include/linux/jump_label_ref.h b/include/linux/jump_label_ref.h deleted file mode 100644 index e5d012ad92c6..000000000000 --- a/include/linux/jump_label_ref.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef _LINUX_JUMP_LABEL_REF_H -#define _LINUX_JUMP_LABEL_REF_H - -#include -#include - -#ifdef HAVE_JUMP_LABEL - -static inline void jump_label_inc(atomic_t *key) -{ - if (atomic_add_return(1, key) == 1) - jump_label_enable(key); -} - -static inline void jump_label_dec(atomic_t *key) -{ - if (atomic_dec_and_test(key)) - jump_label_disable(key); -} - -#else /* !HAVE_JUMP_LABEL */ - -static inline void jump_label_inc(atomic_t *key) -{ - atomic_inc(key); -} - -static inline void jump_label_dec(atomic_t *key) -{ - atomic_dec(key); -} - -#undef JUMP_LABEL -#define JUMP_LABEL(key, label) \ -do { \ - if (unlikely(__builtin_choose_expr( \ - __builtin_types_compatible_p(typeof(key), atomic_t *), \ - atomic_read((atomic_t *)(key)), *(key)))) \ - goto label; \ -} while (0) - -#endif /* HAVE_JUMP_LABEL */ - -#endif /* _LINUX_JUMP_LABEL_REF_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 311b4dc785a1..730b7821690f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -505,7 +505,7 @@ struct perf_guest_info_callbacks { #include #include #include -#include +#include #include #include @@ -1034,7 +1034,7 @@ static inline int is_software_event(struct perf_event *event) return event->pmu->task_ctx_nr == perf_sw_context; } -extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; +extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); @@ -1063,22 +1063,21 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { struct pt_regs hot_regs; - JUMP_LABEL(&perf_swevent_enabled[event_id], have_event); - return; - -have_event: - if (!regs) { - perf_fetch_caller_regs(&hot_regs); - regs = &hot_regs; + if (static_branch(&perf_swevent_enabled[event_id])) { + if (!regs) { + perf_fetch_caller_regs(&hot_regs); + regs = &hot_regs; + } + __perf_sw_event(event_id, nr, nmi, regs, addr); } - __perf_sw_event(event_id, nr, nmi, regs, addr); } -extern atomic_t perf_sched_events; +extern struct jump_label_key perf_sched_events; static inline void perf_event_task_sched_in(struct task_struct *task) { - COND_STMT(&perf_sched_events, __perf_event_task_sched_in(task)); + if (static_branch(&perf_sched_events)) + __perf_event_task_sched_in(task); } static inline @@ -1086,7 +1085,8 @@ void perf_event_task_sched_out(struct task_struct *task, struct task_struct *nex { perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); - COND_STMT(&perf_sched_events, __perf_event_task_sched_out(task, next)); + if (static_branch(&perf_sched_events)) + __perf_event_task_sched_out(task, next); } extern void perf_event_mmap(struct vm_area_struct *vma); diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 97c84a58efb8..d530a4460a0b 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -29,7 +29,7 @@ struct tracepoint_func { struct tracepoint { const char *name; /* Tracepoint name */ - int state; /* State. */ + struct jump_label_key key; void (*regfunc)(void); void (*unregfunc)(void); struct tracepoint_func __rcu *funcs; @@ -146,9 +146,7 @@ void tracepoint_update_probe_range(struct tracepoint * const *begin, extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - JUMP_LABEL(&__tracepoint_##name.state, do_trace); \ - return; \ -do_trace: \ + if (static_branch(&__tracepoint_##name.key)) \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ @@ -176,14 +174,14 @@ do_trace: \ * structures, so we create an array of pointers that will be used for iteration * on the tracepoints. */ -#define DEFINE_TRACE_FN(name, reg, unreg) \ - static const char __tpstrtab_##name[] \ - __attribute__((section("__tracepoints_strings"))) = #name; \ - struct tracepoint __tracepoint_##name \ - __attribute__((section("__tracepoints"))) = \ - { __tpstrtab_##name, 0, reg, unreg, NULL }; \ - static struct tracepoint * const __tracepoint_ptr_##name __used \ - __attribute__((section("__tracepoints_ptrs"))) = \ +#define DEFINE_TRACE_FN(name, reg, unreg) \ + static const char __tpstrtab_##name[] \ + __attribute__((section("__tracepoints_strings"))) = #name; \ + struct tracepoint __tracepoint_##name \ + __attribute__((section("__tracepoints"))) = \ + { __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\ + static struct tracepoint * const __tracepoint_ptr_##name __used \ + __attribute__((section("__tracepoints_ptrs"))) = \ &__tracepoint_##name; #define DEFINE_TRACE(name) \ -- cgit v1.2.2 From 5cdede2408e80f190c5595e592c24e77c1bf44b2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2011 15:55:18 +0200 Subject: PCI: Move ATS declarations in seperate header file This patch moves the relevant declarations from the local header file in drivers/pci to a more accessible locations so that it can be used by the AMD IOMMU driver too. The file is named pci-ats.h because support for the PCI PRI capability will also be added there in a later patch-set. Signed-off-by: Joerg Roedel Acked-by: Jesse Barnes --- include/linux/pci-ats.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 include/linux/pci-ats.h (limited to 'include') diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h new file mode 100644 index 000000000000..655824fa4c76 --- /dev/null +++ b/include/linux/pci-ats.h @@ -0,0 +1,52 @@ +#ifndef LINUX_PCI_ATS_H +#define LINUX_PCI_ATS_H + +/* Address Translation Service */ +struct pci_ats { + int pos; /* capability position */ + int stu; /* Smallest Translation Unit */ + int qdep; /* Invalidate Queue Depth */ + int ref_cnt; /* Physical Function reference count */ + unsigned int is_enabled:1; /* Enable bit is set */ +}; + +#ifdef CONFIG_PCI_IOV + +extern int pci_enable_ats(struct pci_dev *dev, int ps); +extern void pci_disable_ats(struct pci_dev *dev); +extern int pci_ats_queue_depth(struct pci_dev *dev); +/** + * pci_ats_enabled - query the ATS status + * @dev: the PCI device + * + * Returns 1 if ATS capability is enabled, or 0 if not. + */ +static inline int pci_ats_enabled(struct pci_dev *dev) +{ + return dev->ats && dev->ats->is_enabled; +} + +#else /* CONFIG_PCI_IOV */ + +static inline int pci_enable_ats(struct pci_dev *dev, int ps) +{ + return -ENODEV; +} + +static inline void pci_disable_ats(struct pci_dev *dev) +{ +} + +static inline int pci_ats_queue_depth(struct pci_dev *dev) +{ + return -ENODEV; +} + +static inline int pci_ats_enabled(struct pci_dev *dev) +{ + return 0; +} + +#endif /* CONFIG_PCI_IOV */ + +#endif /* LINUX_PCI_ATS_H*/ -- cgit v1.2.2 From dce840a08702bd13a9a186e07e63d1ef82256b5e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Apr 2011 14:09:50 +0200 Subject: sched: Dynamically allocate sched_domain/sched_group data-structures Instead of relying on static allocations for the sched_domain and sched_group trees, dynamically allocate and RCU free them. Allocating this dynamically also allows for some build_sched_groups() simplification since we can now (like with other simplifications) rely on the sched_domain tree instead of hard-coded knowledge. One tricky to note is that detach_destroy_domains() needs to hold rcu_read_lock() over the entire tear-down, per-cpu is not sufficient since that can lead to partial sched_group existance (could possibly be solved by doing the tear-down backwards but this is much more robust). A concequence of the above is that we can no longer print the sched_domain debug stuff from cpu_attach_domain() since that might now run with preemption disabled (due to classic RCU etc.) and sched_domain_debug() does some GFP_KERNEL allocations. Another thing to note is that we now fully rely on normal RCU and not RCU-sched, this is because with the new and exiting RCU flavours we grew over the years BH doesn't necessarily hold off RCU-sched grace periods (-rt is known to break this). This would in fact already cause us grief since we do sched_domain/sched_group iterations from softirq context. This patch is somewhat larger than I would like it to be, but I didn't find any means of shrinking/splitting this. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/20110407122942.245307941@chello.nl Signed-off-by: Ingo Molnar --- include/linux/sched.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ec2c027e92c..020b79d6c486 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -868,6 +868,7 @@ static inline int sd_power_saving_flags(void) struct sched_group { struct sched_group *next; /* Must be a circular list */ + atomic_t ref; /* * CPU power of this group, SCHED_LOAD_SCALE being max power for a @@ -973,6 +974,10 @@ struct sched_domain { #ifdef CONFIG_SCHED_DEBUG char *name; #endif + union { + void *private; /* used during construction */ + struct rcu_head rcu; /* used during destruction */ + }; unsigned int span_weight; /* -- cgit v1.2.2 From 3859173d43658d51a749bc0201b943922577d39c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Apr 2011 14:09:53 +0200 Subject: sched: Reduce some allocation pressure Since we now allocate SD_LV_MAX * nr_cpu_ids sched_domain/sched_group structures when rebuilding the scheduler toplogy it might make sense to shrink that depending on the CONFIG_ options. This is only needed until we get rid of SD_LV_* alltogether and provide a full dynamic topology interface. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/20110407122942.406226449@chello.nl Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 020b79d6c486..5a9168b01db8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -897,12 +897,20 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) enum sched_domain_level { SD_LV_NONE = 0, +#ifdef CONFIG_SCHED_SMT SD_LV_SIBLING, +#endif +#ifdef CONFIG_SCHED_MC SD_LV_MC, +#endif +#ifdef CONFIG_SCHED_BOOK SD_LV_BOOK, +#endif SD_LV_CPU, +#ifdef CONFIG_NUMA SD_LV_NODE, SD_LV_ALLNODES, +#endif SD_LV_MAX }; -- cgit v1.2.2 From 7dd04b730749f957c116f363524fd622b05e5141 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Apr 2011 14:09:56 +0200 Subject: sched: Remove some dead code Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/20110407122942.553814623@chello.nl Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5a9168b01db8..09d9e02f2b61 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -883,9 +883,6 @@ struct sched_group { * NOTE: this field is variable length. (Allocated dynamically * by attaching extra space to the end of the structure, * depending on how many CPUs the kernel has booted up with) - * - * It is also be embedded into static data structures at build - * time. (See 'struct static_sched_group' in kernel/sched.c) */ unsigned long cpumask[0]; }; @@ -994,9 +991,6 @@ struct sched_domain { * NOTE: this field is variable length. (Allocated dynamically * by attaching extra space to the end of the structure, * depending on how many CPUs the kernel has booted up with) - * - * It is also be embedded into static data structures at build - * time. (See 'struct static_sched_domain' in kernel/sched.c) */ unsigned long span[0]; }; -- cgit v1.2.2 From 60495e7760d8ee364695006af37309b0755e0e17 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Apr 2011 14:10:04 +0200 Subject: sched: Dynamic sched_domain::level Remove the SD_LV_ enum and use dynamic level assignments. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/20110407122942.969433965@chello.nl Signed-off-by: Ingo Molnar --- include/linux/sched.h | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 09d9e02f2b61..e43e5b0ab0b5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -892,25 +892,6 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) return to_cpumask(sg->cpumask); } -enum sched_domain_level { - SD_LV_NONE = 0, -#ifdef CONFIG_SCHED_SMT - SD_LV_SIBLING, -#endif -#ifdef CONFIG_SCHED_MC - SD_LV_MC, -#endif -#ifdef CONFIG_SCHED_BOOK - SD_LV_BOOK, -#endif - SD_LV_CPU, -#ifdef CONFIG_NUMA - SD_LV_NODE, - SD_LV_ALLNODES, -#endif - SD_LV_MAX -}; - struct sched_domain_attr { int relax_domain_level; }; @@ -919,6 +900,8 @@ struct sched_domain_attr { .relax_domain_level = -1, \ } +extern int sched_domain_level_max; + struct sched_domain { /* These fields must be setup */ struct sched_domain *parent; /* top domain must be null terminated */ @@ -936,7 +919,7 @@ struct sched_domain { unsigned int forkexec_idx; unsigned int smt_gain; int flags; /* See SD_* */ - enum sched_domain_level level; + int level; /* Runtime fields. */ unsigned long last_balance; /* init to jiffies. units in jiffies */ -- cgit v1.2.2 From 184748cc50b2dceb8287f9fb657eda48ff8fcfe7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Apr 2011 17:23:39 +0200 Subject: sched: Provide scheduler_ipi() callback in response to smp_send_reschedule() For future rework of try_to_wake_up() we'd like to push part of that function onto the CPU the task is actually going to run on. In order to do so we need a generic callback from the existing scheduler IPI. This patch introduces such a generic callback: scheduler_ipi() and implements it as a NOP. BenH notes: PowerPC might use this IPI on offline CPUs under rare conditions! Acked-by: Russell King Acked-by: Martin Schwidefsky Acked-by: Chris Metcalf Acked-by: Jesper Nilsson Acked-by: Benjamin Herrenschmidt Signed-off-by: Ralf Baechle Reviewed-by: Frank Rowand Cc: Mike Galbraith Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110405152728.744338123@chello.nl --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ec2c027e92c..758e27afcda5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2189,8 +2189,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP +static inline void scheduler_ipi(void) { } extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else +static inline void scheduler_ipi(void) { } static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) { -- cgit v1.2.2 From 3ca7a440da394808571dad32d33d3bc0389982e6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Apr 2011 17:23:40 +0200 Subject: sched: Always provide p->on_cpu Always provide p->on_cpu so that we can determine if its on a cpu without having to lock the rq. Reviewed-by: Frank Rowand Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/20110405152728.785452014@chello.nl Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 758e27afcda5..3435837e89ff 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1200,9 +1200,7 @@ struct task_struct { int lock_depth; /* BKL lock depth */ #ifdef CONFIG_SMP -#ifdef __ARCH_WANT_UNLOCKED_CTXSW - int oncpu; -#endif + int on_cpu; #endif int prio, static_prio, normal_prio; -- cgit v1.2.2 From c6eb3dda25892f1f974f5420f63e6721aab02f6f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Apr 2011 17:23:41 +0200 Subject: mutex: Use p->on_cpu for the adaptive spin Since we now have p->on_cpu unconditionally available, use it to re-implement mutex_spin_on_owner. Requested-by: Thomas Gleixner Reviewed-by: Frank Rowand Cc: Mike Galbraith Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110405152728.826338173@chello.nl --- include/linux/mutex.h | 2 +- include/linux/sched.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 94b48bd40dd7..c75471db576e 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -51,7 +51,7 @@ struct mutex { spinlock_t wait_lock; struct list_head wait_list; #if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) - struct thread_info *owner; + struct task_struct *owner; #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; diff --git a/include/linux/sched.h b/include/linux/sched.h index 3435837e89ff..173850479e2c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -360,7 +360,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout); extern signed long schedule_timeout_killable(signed long timeout); extern signed long schedule_timeout_uninterruptible(signed long timeout); asmlinkage void schedule(void); -extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); +extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); struct nsproxy; struct user_namespace; -- cgit v1.2.2 From fd2f4419b4cbe8fe90796df9617c355762afd6a4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Apr 2011 17:23:44 +0200 Subject: sched: Provide p->on_rq Provide a generic p->on_rq because the p->se.on_rq semantics are unfavourable for lockless wakeups but needed for sched_fair. In particular, p->on_rq is only cleared when we actually dequeue the task in schedule() and not on any random dequeue as done by things like __migrate_task() and __sched_setscheduler(). This also allows us to remove p->se usage from !sched_fair code. Reviewed-by: Frank Rowand Cc: Mike Galbraith Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110405152728.949545047@chello.nl --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 173850479e2c..b33a700652dc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1202,6 +1202,7 @@ struct task_struct { #ifdef CONFIG_SMP int on_cpu; #endif + int on_rq; int prio, static_prio, normal_prio; unsigned int rt_priority; -- cgit v1.2.2 From 7608dec2ce2004c234339bef8c8074e5e601d0e9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Apr 2011 17:23:46 +0200 Subject: sched: Drop the rq argument to sched_class::select_task_rq() In preparation of calling select_task_rq() without rq->lock held, drop the dependency on the rq argument. Reviewed-by: Frank Rowand Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/20110405152729.031077745@chello.nl Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b33a700652dc..ff4e2f9c24a7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1067,8 +1067,7 @@ struct sched_class { void (*put_prev_task) (struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP - int (*select_task_rq)(struct rq *rq, struct task_struct *p, - int sd_flag, int flags); + int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); void (*post_schedule) (struct rq *this_rq); -- cgit v1.2.2 From 74f8e4b2335de45485b8d5b31a504747f13c8070 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Apr 2011 17:23:47 +0200 Subject: sched: Remove rq argument to sched_class::task_waking() In preparation of calling this without rq->lock held, remove the dependency on the rq argument. Reviewed-by: Frank Rowand Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/20110405152729.071474242@chello.nl Signed-off-by: Ingo Molnar --- include/linux/sched.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index ff4e2f9c24a7..7f5732f8c618 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1048,8 +1048,12 @@ struct sched_domain; #define WF_FORK 0x02 /* child wakeup after fork */ #define ENQUEUE_WAKEUP 1 -#define ENQUEUE_WAKING 2 -#define ENQUEUE_HEAD 4 +#define ENQUEUE_HEAD 2 +#ifdef CONFIG_SMP +#define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */ +#else +#define ENQUEUE_WAKING 0 +#endif #define DEQUEUE_SLEEP 1 @@ -1071,7 +1075,7 @@ struct sched_class { void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); void (*post_schedule) (struct rq *this_rq); - void (*task_waking) (struct rq *this_rq, struct task_struct *task); + void (*task_waking) (struct task_struct *task); void (*task_woken) (struct rq *this_rq, struct task_struct *task); void (*set_cpus_allowed)(struct task_struct *p, -- cgit v1.2.2 From a8e4f2eaecc9bfa4954adf79a04f4f22fddd829c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Apr 2011 17:23:49 +0200 Subject: sched: Delay task_contributes_to_load() In prepratation of having to call task_contributes_to_load() without holding rq->lock, we need to store the result until we do and can update the rq accounting accordingly. Reviewed-by: Frank Rowand Cc: Mike Galbraith Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110405152729.151523907@chello.nl --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7f5732f8c618..25c50317ddc1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1273,6 +1273,7 @@ struct task_struct { /* Revert to default priority/policy when forking */ unsigned sched_reset_on_fork:1; + unsigned sched_contributes_to_load:1; pid_t pid; pid_t tgid; -- cgit v1.2.2 From 317f394160e9beb97d19a84c39b7e5eb3d7815a8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Apr 2011 17:23:58 +0200 Subject: sched: Move the second half of ttwu() to the remote cpu Now that we've removed the rq->lock requirement from the first part of ttwu() and can compute placement without holding any rq->lock, ensure we execute the second half of ttwu() on the actual cpu we want the task to run on. This avoids having to take rq->lock and doing the task enqueue remotely, saving lots on cacheline transfers. As measured using: http://oss.oracle.com/~mason/sembench.c $ for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor ; do echo performance > $i; done $ echo 4096 32000 64 128 > /proc/sys/kernel/sem $ ./sembench -t 2048 -w 1900 -o 0 unpatched: run time 30 seconds 647278 worker burns per second patched: run time 30 seconds 816715 worker burns per second Reviewed-by: Frank Rowand Cc: Mike Galbraith Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110405152729.515897185@chello.nl --- include/linux/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 25c50317ddc1..e09dafa6e149 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1203,6 +1203,7 @@ struct task_struct { int lock_depth; /* BKL lock depth */ #ifdef CONFIG_SMP + struct task_struct *wake_entry; int on_cpu; #endif int on_rq; @@ -2192,7 +2193,7 @@ extern void set_task_comm(struct task_struct *tsk, char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP -static inline void scheduler_ipi(void) { } +void scheduler_ipi(void); extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else static inline void scheduler_ipi(void) { } -- cgit v1.2.2 From beafbdc1df02877612dc9039c1de0639921fddec Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 11:17:36 -0400 Subject: xen/irq: Check if the PCI device is owned by a domain different than DOMID_SELF. We check if there is a domain owner for the PCI device. In case of failure (meaning no domain has registered for this device) we make DOMID_SELF the owner. Signed-off-by: Konrad Rzeszutek Wilk [v2: deal with rebasing on v2.6.37-1] [v3: deal with rebasing on stable/irq.cleanup] [v4: deal with rebasing on stable/irq.ween_of_nr_irqs] [v5: deal with rebasing on v2.6.39-rc3] Signed-off-by: Jeremy Fitzhardinge Acked-by: Xiantao Zhang --- include/xen/events.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/xen/events.h b/include/xen/events.h index f1b87ad48ac7..9aecc0b5a0e6 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -85,7 +85,8 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc); /* Bind an PSI pirq to an irq. */ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, - int pirq, int vector, const char *name); + int pirq, int vector, const char *name, + domid_t domid); #endif /* De-allocates the above mentioned physical interrupt. */ -- cgit v1.2.2 From c7c2c3a28657cfdcef50c02b18ccca3761209e17 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 8 Nov 2010 14:26:36 -0500 Subject: xen/irq: Add support to check if IRQ line is shared with other domains. We do this via the PHYSDEVOP_irq_status_query support hypervisor call. We will get a positive value if another domain has binded its PIRQ to the specified GSI (IRQ line). [v2: Deal with v2.6.37-rc1 rebase fallout] [v3: Deal with stable/irq.cleanup fallout] [v4: xen_ignore_irq->xen_test_irq_shared] Signed-off-by: Konrad Rzeszutek Wilk --- include/xen/events.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/xen/events.h b/include/xen/events.h index 9aecc0b5a0e6..932e54051d3e 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -95,4 +95,7 @@ int xen_destroy_irq(int irq); /* Return irq from pirq */ int xen_irq_from_pirq(unsigned pirq); +/* Determine whether to ignore this IRQ if it is passed to a guest. */ +int xen_test_irq_shared(int irq); + #endif /* _XEN_EVENTS_H */ -- cgit v1.2.2 From e6197acc726ab3baa60375a5891d58c2ee87e0f3 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 24 Feb 2011 14:20:12 -0500 Subject: xen/irq: Export 'xen_pirq_from_irq' function. We need this to find the real Xen PIRQ value for a device that requests an MSI or MSI-X. In the past we used 'xen_gsi_from_irq' since that function would return an Xen PIRQ or GSI depending on the provided IRQ. Now that we have seperated that we need to use the correct function. [v2: Deal with rebase on stable/irq.cleanup] Signed-off-by: Konrad Rzeszutek Wilk --- include/xen/events.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/xen/events.h b/include/xen/events.h index 932e54051d3e..9af21e19545a 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -95,6 +95,9 @@ int xen_destroy_irq(int irq); /* Return irq from pirq */ int xen_irq_from_pirq(unsigned pirq); +/* Return the pirq allocated to the irq. */ +int xen_pirq_from_irq(unsigned irq); + /* Determine whether to ignore this IRQ if it is passed to a guest. */ int xen_test_irq_shared(int irq); -- cgit v1.2.2 From bcdd323b893ad3c9b7ef26b5e4a0bef974238501 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 16 Mar 2011 15:59:35 +0200 Subject: device: add dev_WARN_ONCE it's quite useful to print the device name on the stack dump caused by WARN(), but there are other cases where we might want to use WARN_ONCE. Introduce a helper similar to dev_WARN() for that case too. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index ab8dfc095709..d4840511e877 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -742,13 +742,17 @@ do { \ #endif /* - * dev_WARN() acts like dev_printk(), but with the key difference + * dev_WARN*() acts like dev_printk(), but with the key difference * of using a WARN/WARN_ON to get the message out, including the * file/line information and a backtrace. */ #define dev_WARN(dev, format, arg...) \ WARN(1, "Device: %s\n" format, dev_driver_string(dev), ## arg); +#define dev_WARN_ONCE(dev, condition, format, arg...) \ + WARN_ONCE(condition, "Device %s\n" format, \ + dev_driver_string(dev), ## arg) + /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor)) -- cgit v1.2.2 From aed65af1cc2f6fc9ded5a8158f1405a02cf6d2ff Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 28 Mar 2011 09:12:52 -0700 Subject: drivers: make device_type const The device_type structure does not contain data that changes during usage and should be const. This allows devices to declare the struct const. I have patches to change all the subsystems, but need the infra structure change first. Signed-off-by: Stephen Hemminger Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index d4840511e877..350ceda4de97 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -408,7 +408,7 @@ struct device { struct kobject kobj; const char *init_name; /* initial name of the device */ - struct device_type *type; + const struct device_type *type; struct mutex mutex; /* mutex to synchronize calls to * its driver. -- cgit v1.2.2 From 764b0c4b3256ad4431cb52eaf99c0abe6df0a085 Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Fri, 8 Apr 2011 04:57:42 -0500 Subject: drivers:misc:ti-st: handle delayed tty receive When certain technologies shutdown their interface without waiting for the acknowledgement from the chip. The receive_buf from the TTY would be invoked a while after the relevant technology is unregistered. This patch introduces a new flag "is_registered" which maintains the state of protocols BT, FM or GPS and thereby removes the need to clear the protocol data from ST when protocols gets unregistered. This fixes corner cases when HCI RESET is sent down from bluetooth stack and the receive_buf is called from tty after 250ms before which bluetooth would have unregistered from the system. OR - when FM application decides to close down the device without sending a power-off FM command resulting in some RDS data or interrupt data coming in after the driver is unregistered. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- include/linux/ti_wilink_st.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h index 7071ec5d0118..b004e557caa9 100644 --- a/include/linux/ti_wilink_st.h +++ b/include/linux/ti_wilink_st.h @@ -140,12 +140,12 @@ extern long st_unregister(struct st_proto_s *); */ struct st_data_s { unsigned long st_state; - struct tty_struct *tty; struct sk_buff *tx_skb; #define ST_TX_SENDING 1 #define ST_TX_WAKEUP 2 unsigned long tx_state; struct st_proto_s *list[ST_MAX_CHANNELS]; + bool is_registered[ST_MAX_CHANNELS]; unsigned long rx_state; unsigned long rx_count; struct sk_buff *rx_skb; @@ -155,6 +155,7 @@ struct st_data_s { unsigned char protos_registered; unsigned long ll_state; void *kim_data; + struct tty_struct *tty; }; /* -- cgit v1.2.2 From c8705082404823a5bb3e02a32ba0764399b9e6f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 20 Apr 2011 09:44:46 +0200 Subject: driver core: let dev_set_drvdata return int instead of void as it can fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before commit b402843 (Driver core: move dev_get/set_drvdata to drivers/base/dd.c) calling dev_set_drvdata with dev=NULL was an unchecked error. After some discussion about what to return in this case removing the check (and so producing a null pointer exception) seems fine. Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 350ceda4de97..2215d013ca96 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -557,7 +557,7 @@ extern int device_move(struct device *dev, struct device *new_parent, extern const char *device_get_devnode(struct device *dev, mode_t *mode, const char **tmp); extern void *dev_get_drvdata(const struct device *dev); -extern void dev_set_drvdata(struct device *dev, void *data); +extern int dev_set_drvdata(struct device *dev, void *data); /* * Root device objects for grouping under /sys/devices -- cgit v1.2.2 From 0911f124bf55357803d53197cc1ae5479f5e37e2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 10 Apr 2011 11:01:51 +0200 Subject: genirq: Forgotten updates/deletions after removal of compat code commit 0c6f8a8b917ad361319c8ace3e9f28e69bfdb4c1 ("genirq: Remove compat code") removed the compat code, but forgot to update some references in comments and delete some of its documentation. Signed-off-by: Geert Uytterhoeven Link: http://lkml.kernel.org/r/%3C1302426113-13808-1-git-send-email-geert%40linux-m68k.org%3E Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 19 +------------------ include/linux/irqdesc.h | 2 +- 2 files changed, 2 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 09a308072f56..a71dd18639fb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -53,7 +53,7 @@ typedef void (*irq_preflow_handler_t)(struct irq_data *data); * Bits which can be modified via irq_set/clear/modify_status_flags() * IRQ_LEVEL - Interrupt is level type. Will be also * updated in the code when the above trigger - * bits are modified via set_irq_type() + * bits are modified via irq_set_irq_type() * IRQ_PER_CPU - Mark an interrupt PER_CPU. Will protect * it from affinity setting * IRQ_NOPROBE - Interrupt cannot be probed by autoprobing @@ -261,23 +261,6 @@ static inline void irqd_clr_chained_irq_inprogress(struct irq_data *d) * struct irq_chip - hardware interrupt chip descriptor * * @name: name for /proc/interrupts - * @startup: deprecated, replaced by irq_startup - * @shutdown: deprecated, replaced by irq_shutdown - * @enable: deprecated, replaced by irq_enable - * @disable: deprecated, replaced by irq_disable - * @ack: deprecated, replaced by irq_ack - * @mask: deprecated, replaced by irq_mask - * @mask_ack: deprecated, replaced by irq_mask_ack - * @unmask: deprecated, replaced by irq_unmask - * @eoi: deprecated, replaced by irq_eoi - * @end: deprecated, will go away with __do_IRQ() - * @set_affinity: deprecated, replaced by irq_set_affinity - * @retrigger: deprecated, replaced by irq_retrigger - * @set_type: deprecated, replaced by irq_set_type - * @set_wake: deprecated, replaced by irq_wake - * @bus_lock: deprecated, replaced by irq_bus_lock - * @bus_sync_unlock: deprecated, replaced by irq_bus_sync_unlock - * * @irq_startup: start up the interrupt (defaults to ->enable if NULL) * @irq_shutdown: shut down the interrupt (defaults to ->disable if NULL) * @irq_enable: enable the interrupt (defaults to chip->unmask if NULL) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index a082905b5ebe..8e1dc8ea5471 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -21,7 +21,7 @@ struct timer_rand_state; * @status: status information * @core_internal_state__do_not_mess_with_it: core internal status information * @depth: disable-depth, for nested irq_disable() calls - * @wake_depth: enable depth, for multiple set_irq_wake() callers + * @wake_depth: enable depth, for multiple irq_set_irq_wake() callers * @irq_count: stats field to detect stalled irqs * @last_unhandled: aging timer for unhandled count * @irqs_unhandled: stats field for spurious unhandled interrupts -- cgit v1.2.2 From 770767787c23040dc152e7ae230597ff55b39470 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 10 Apr 2011 11:01:52 +0200 Subject: genirq: irq_desc: Document preflow_handler and affinity_hint [ tglx: Filled in the FIXME place holders ] Signed-off-by: Geert Uytterhoeven Link: http://lkml.kernel.org/r/%3C1302426113-13808-2-git-send-email-geert%40linux-m68k.org%3E Signed-off-by: Thomas Gleixner --- include/linux/irqdesc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 8e1dc8ea5471..c70b1aa4b93a 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -16,7 +16,8 @@ struct timer_rand_state; * @irq_data: per irq and chip data passed down to chip functions * @timer_rand_state: pointer to timer rand state struct * @kstat_irqs: irq stats per cpu - * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] + * @handle_irq: highlevel irq-events handler + * @preflow_handler: handler called before the flow handler (currently used by sparc) * @action: the irq action chain * @status: status information * @core_internal_state__do_not_mess_with_it: core internal status information @@ -26,6 +27,7 @@ struct timer_rand_state; * @last_unhandled: aging timer for unhandled count * @irqs_unhandled: stats field for spurious unhandled interrupts * @lock: locking for SMP + * @affinity_hint: hint to user space for preferred irq affinity * @affinity_notify: context for notification of affinity changes * @pending_mask: pending rebalanced interrupts * @threads_oneshot: bitfield to handle shared oneshot threads -- cgit v1.2.2 From 7f1b1244e159a8490d7fb13667c6cb7e1e75046b Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 7 Apr 2011 06:01:44 +0900 Subject: genirq: Support per-IRQ thread disabling. This adds support for disabling threading on a per-IRQ basis via the IRQ status instead of the IRQ flow, which is necessary for interrupts that don't follow the natural IRQ flow channels, such as those that are virtually created. The new APIs added are simply: irq_set_thread() irq_set_nothread() which follow the rest of the IRQ status routines. Chained handlers also have IRQ_NOTHREAD set on them automatically, making the lack of threading explicit rather than implicit. Subsequently, the nothread flag can be viewed through the standard genirq debugging facilities. [ tglx: Fixed cleanup fallout ] Signed-off-by: Paul Mundt Link: http://lkml.kernel.org/r/%3C20110406210135.GF18426%40linux-sh.org%3E Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index a71dd18639fb..39c23786c1db 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -59,6 +59,7 @@ typedef void (*irq_preflow_handler_t)(struct irq_data *data); * IRQ_NOPROBE - Interrupt cannot be probed by autoprobing * IRQ_NOREQUEST - Interrupt cannot be requested via * request_irq() + * IRQ_NOTHREAD - Interrupt cannot be threaded * IRQ_NOAUTOEN - Interrupt is not automatically enabled in * request/setup_irq() * IRQ_NO_BALANCING - Interrupt cannot be balanced (affinity set) @@ -85,6 +86,7 @@ enum { IRQ_NO_BALANCING = (1 << 13), IRQ_MOVE_PCNTXT = (1 << 14), IRQ_NESTED_THREAD = (1 << 15), + IRQ_NOTHREAD = (1 << 16), }; #define IRQF_MODIFY_MASK \ @@ -422,7 +424,7 @@ irq_set_handler(unsigned int irq, irq_flow_handler_t handle) /* * Set a highlevel chained flow handler for a given IRQ. * (a chained handler is automatically enabled and set to - * IRQ_NOREQUEST and IRQ_NOPROBE) + * IRQ_NOREQUEST, IRQ_NOPROBE, and IRQ_NOTHREAD) */ static inline void irq_set_chained_handler(unsigned int irq, irq_flow_handler_t handle) @@ -452,6 +454,16 @@ static inline void irq_set_probe(unsigned int irq) irq_modify_status(irq, IRQ_NOPROBE, 0); } +static inline void irq_set_nothread(unsigned int irq) +{ + irq_modify_status(irq, 0, IRQ_NOTHREAD); +} + +static inline void irq_set_thread(unsigned int irq) +{ + irq_modify_status(irq, IRQ_NOTHREAD, 0); +} + static inline void irq_set_nested_thread(unsigned int irq, bool nest) { if (nest) -- cgit v1.2.2 From 7d8280624797bbe2f5170bd3c85c75a8c9c74242 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 3 Apr 2011 11:42:53 +0200 Subject: genirq: Implement a generic interrupt chip Implement a generic interrupt chip, which is configurable and is able to handle the most common irq chip implementations. Signed-off-by: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Tested-by: H Hartley Sweeten Tested-by: Tony Lindgren Tested-by; Kevin Hilman --- include/linux/irq.h | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 39c23786c1db..2ba2f1216790 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -568,6 +568,141 @@ static inline int irq_reserve_irq(unsigned int irq) return irq_reserve_irqs(irq, 1); } +#ifndef irq_reg_writel +# define irq_reg_writel(val, addr) writel(val, addr) +#endif +#ifndef irq_reg_readl +# define irq_reg_readl(addr) readl(addr) +#endif + +/** + * struct irq_chip_regs - register offsets for struct irq_gci + * @enable: Enable register offset to reg_base + * @disable: Disable register offset to reg_base + * @mask: Mask register offset to reg_base + * @ack: Ack register offset to reg_base + * @eoi: Eoi register offset to reg_base + * @type: Type configuration register offset to reg_base + * @polarity: Polarity configuration register offset to reg_base + */ +struct irq_chip_regs { + unsigned long enable; + unsigned long disable; + unsigned long mask; + unsigned long ack; + unsigned long eoi; + unsigned long type; + unsigned long polarity; +}; + +/** + * struct irq_chip_type - Generic interrupt chip instance for a flow type + * @chip: The real interrupt chip which provides the callbacks + * @regs: Register offsets for this chip + * @handler: Flow handler associated with this chip + * @type: Chip can handle these flow types + * + * A irq_generic_chip can have several instances of irq_chip_type when + * it requires different functions and register offsets for different + * flow types. + */ +struct irq_chip_type { + struct irq_chip chip; + struct irq_chip_regs regs; + irq_flow_handler_t handler; + u32 type; +}; + +/** + * struct irq_chip_generic - Generic irq chip data structure + * @lock: Lock to protect register and cache data access + * @reg_base: Register base address (virtual) + * @irq_base: Interrupt base nr for this chip + * @irq_cnt: Number of interrupts handled by this chip + * @mask_cache: Cached mask register + * @type_cache: Cached type register + * @polarity_cache: Cached polarity register + * @wake_enabled: Interrupt can wakeup from suspend + * @wake_active: Interrupt is marked as an wakeup from suspend source + * @num_ct: Number of available irq_chip_type instances (usually 1) + * @private: Private data for non generic chip callbacks + * @chip_types: Array of interrupt irq_chip_types + * + * Note, that irq_chip_generic can have multiple irq_chip_type + * implementations which can be associated to a particular irq line of + * an irq_chip_generic instance. That allows to share and protect + * state in an irq_chip_generic instance when we need to implement + * different flow mechanisms (level/edge) for it. + */ +struct irq_chip_generic { + raw_spinlock_t lock; + void __iomem *reg_base; + unsigned int irq_base; + unsigned int irq_cnt; + u32 mask_cache; + u32 type_cache; + u32 polarity_cache; + u32 wake_enabled; + u32 wake_active; + unsigned int num_ct; + void *private; + struct irq_chip_type chip_types[0]; +}; + +/** + * enum irq_gc_flags - Initialization flags for generic irq chips + * @IRQ_GC_INIT_MASK_CACHE: Initialize the mask_cache by reading mask reg + * @IRQ_GC_INIT_NESTED_LOCK: Set the lock class of the irqs to nested for + * irq chips which need to call irq_set_wake() on + * the parent irq. Usually GPIO implementations + */ +enum irq_gc_flags { + IRQ_GC_INIT_MASK_CACHE = 1 << 0, + IRQ_GC_INIT_NESTED_LOCK = 1 << 1, +}; + +/* Generic chip callback functions */ +void irq_gc_noop(struct irq_data *d); +void irq_gc_mask_disable_reg(struct irq_data *d); +void irq_gc_mask_set_bit(struct irq_data *d); +void irq_gc_mask_clr_bit(struct irq_data *d); +void irq_gc_unmask_enable_reg(struct irq_data *d); +void irq_gc_ack(struct irq_data *d); +void irq_gc_mask_disable_reg_and_ack(struct irq_data *d); +void irq_gc_eoi(struct irq_data *d); +int irq_gc_set_wake(struct irq_data *d, unsigned int on); + +/* Setup functions for irq_chip_generic */ +struct irq_chip_generic * +irq_alloc_generic_chip(const char *name, int nr_ct, unsigned int irq_base, + void __iomem *reg_base, irq_flow_handler_t handler); +void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, + enum irq_gc_flags flags, unsigned int clr, + unsigned int set); +int irq_setup_alt_chip(struct irq_data *d, unsigned int type); + +static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d) +{ + return container_of(d->chip, struct irq_chip_type, chip); +} + +#define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX) + +#ifdef CONFIG_SMP +static inline void irq_gc_lock(struct irq_chip_generic *gc) +{ + raw_spin_lock(&gc->lock); +} + +static inline void irq_gc_unlock(struct irq_chip_generic *gc) +{ + raw_spin_unlock(&gc->lock); +} +#else +static inline void irq_gc_lock(struct irq_chip_generic *gc) { } +static inline void irq_gc_unlock(struct irq_chip_generic *gc) { } +#endif + #endif /* CONFIG_GENERIC_HARDIRQS */ #endif /* !CONFIG_S390 */ -- cgit v1.2.2 From cfefd21e693dca791bf9ecfc9dd3794facad533c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Apr 2011 22:36:08 +0200 Subject: genirq: Add chip suspend and resume callbacks These callbacks are only called in the syscore suspend/resume code on interrupt chips which have been registered via the generic irq chip mechanism. Calling those callbacks per irq would be rather icky, but with the generic irq chip mechanism we can call this per registered chip. Signed-off-by: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org --- include/linux/irq.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 2ba2f1216790..8b4538446636 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -280,6 +280,9 @@ static inline void irqd_clr_chained_irq_inprogress(struct irq_data *d) * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips * @irq_cpu_online: configure an interrupt source for a secondary CPU * @irq_cpu_offline: un-configure an interrupt source for a secondary CPU + * @irq_suspend: function called from core code on suspend once per chip + * @irq_resume: function called from core code on resume once per chip + * @irq_pm_shutdown: function called from core code on shutdown once per chip * @irq_print_chip: optional to print special chip info in show_interrupts * @flags: chip specific flags * @@ -309,6 +312,10 @@ struct irq_chip { void (*irq_cpu_online)(struct irq_data *data); void (*irq_cpu_offline)(struct irq_data *data); + void (*irq_suspend)(struct irq_data *data); + void (*irq_resume)(struct irq_data *data); + void (*irq_pm_shutdown)(struct irq_data *data); + void (*irq_print_chip)(struct irq_data *data, struct seq_file *p); unsigned long flags; @@ -626,6 +633,7 @@ struct irq_chip_type { * @wake_active: Interrupt is marked as an wakeup from suspend source * @num_ct: Number of available irq_chip_type instances (usually 1) * @private: Private data for non generic chip callbacks + * @list: List head for keeping track of instances * @chip_types: Array of interrupt irq_chip_types * * Note, that irq_chip_generic can have multiple irq_chip_type @@ -646,6 +654,7 @@ struct irq_chip_generic { u32 wake_active; unsigned int num_ct; void *private; + struct list_head list; struct irq_chip_type chip_types[0]; }; @@ -680,6 +689,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, enum irq_gc_flags flags, unsigned int clr, unsigned int set); int irq_setup_alt_chip(struct irq_data *d, unsigned int type); +void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, + unsigned int clr, unsigned int set); static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d) { -- cgit v1.2.2 From 625f2a378e5a10f45fdc37932fc9f8a21676de9e Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 22 Apr 2011 11:19:10 -0600 Subject: sched: Get rid of lock_depth Neil Brown pointed out that lock_depth somehow escaped the BKL removal work. Let's get rid of it now. Note that the perf scripting utilities still have a bunch of code for dealing with common_lock_depth in tracepoints; I have left that in place in case anybody wants to use that code with older kernels. Suggested-by: Neil Brown Signed-off-by: Jonathan Corbet Cc: Arnd Bergmann Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/20110422111910.456c0e84@bike.lwn.net Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 1 - include/linux/sched.h | 6 ------ 2 files changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index caa151fbebb7..689496bb6654 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -134,7 +134,6 @@ extern struct cred init_cred; .stack = &init_thread_info, \ .usage = ATOMIC_INIT(2), \ .flags = PF_KTHREAD, \ - .lock_depth = -1, \ .prio = MAX_PRIO-20, \ .static_prio = MAX_PRIO-20, \ .normal_prio = MAX_PRIO-20, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 171ba24b08a7..013314a56105 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -731,10 +731,6 @@ struct sched_info { /* timestamps */ unsigned long long last_arrival,/* when we last ran on a cpu */ last_queued; /* when we were last queued to run */ -#ifdef CONFIG_SCHEDSTATS - /* BKL stats */ - unsigned int bkl_count; -#endif }; #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ @@ -1190,8 +1186,6 @@ struct task_struct { unsigned int flags; /* per process flags, defined below */ unsigned int ptrace; - int lock_depth; /* BKL lock depth */ - #ifdef CONFIG_SMP struct task_struct *wake_entry; int on_cpu; -- cgit v1.2.2 From bf26c018490c2fce7fe9b629083b96ce0e6ad019 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 7 Apr 2011 16:53:20 +0200 Subject: ptrace: Prepare to fix racy accesses on task breakpoints When a task is traced and is in a stopped state, the tracer may execute a ptrace request to examine the tracee state and get its task struct. Right after, the tracee can be killed and thus its breakpoints released. This can happen concurrently when the tracer is in the middle of reading or modifying these breakpoints, leading to dereferencing a freed pointer. Hence, to prepare the fix, create a generic breakpoint reference holding API. When a reference on the breakpoints of a task is held, the breakpoints won't be released until the last reference is dropped. After that, no more ptrace request on the task's breakpoints can be serviced for the tracer. Reported-by: Oleg Nesterov Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Will Deacon Cc: Prasad Cc: Paul Mundt Cc: v2.6.33.. Link: http://lkml.kernel.org/r/1302284067-7860-2-git-send-email-fweisbec@gmail.com --- include/linux/ptrace.h | 13 ++++++++++++- include/linux/sched.h | 3 +++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index a1147e5dd245..9178d5cc0b01 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -189,6 +189,10 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) child->ptrace = current->ptrace; __ptrace_link(child, current->parent); } + +#ifdef CONFIG_HAVE_HW_BREAKPOINT + atomic_set(&child->ptrace_bp_refcnt, 1); +#endif } /** @@ -350,6 +354,13 @@ extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); -#endif +#ifdef CONFIG_HAVE_HW_BREAKPOINT +extern int ptrace_get_breakpoints(struct task_struct *tsk); +extern void ptrace_put_breakpoints(struct task_struct *tsk); +#else +static inline void ptrace_put_breakpoints(struct task_struct *tsk) { } +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + +#endif /* __KERNEL */ #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 18d63cea2848..781abd137673 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1537,6 +1537,9 @@ struct task_struct { unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ } memcg_batch; #endif +#ifdef CONFIG_HAVE_HW_BREAKPOINT + atomic_t ptrace_bp_refcnt; +#endif }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ -- cgit v1.2.2 From ad58671cf32c74a8d6e8f51e63e9cf4e7a73bf1e Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 19 Apr 2011 12:43:45 +0100 Subject: Add a strtobool function matching semantics of existing in kernel equivalents This is a rename of the usr_strtobool proposal, which was a renamed, relocated and fixed version of previous kstrtobool RFC Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- include/linux/string.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index a716ee2a8adb..a176db2f2c85 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -123,6 +123,7 @@ extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); extern bool sysfs_streq(const char *s1, const char *s2); +extern int strtobool(const char *s, bool *res); #ifdef CONFIG_BINARY_PRINTF int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); -- cgit v1.2.2 From 94403f8863d0d1d2005291b2ef0719c2534aa303 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 24 Apr 2011 08:18:31 +0200 Subject: perf events: Add stalled cycles generic event - PERF_COUNT_HW_STALLED_CYCLES The new PERF_COUNT_HW_STALLED_CYCLES event tries to approximate cycles the CPU does nothing useful, because it is stalled on a cache-miss or some other condition. Acked-by: Peter Zijlstra Acked-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-fue11vymwqsoo5to72jxxjyl@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ee9f1e782800..ac636dd20a0c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -52,6 +52,7 @@ enum perf_hw_id { PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, PERF_COUNT_HW_BRANCH_MISSES = 5, PERF_COUNT_HW_BUS_CYCLES = 6, + PERF_COUNT_HW_STALLED_CYCLES = 7, PERF_COUNT_HW_MAX, /* non-ABI */ }; -- cgit v1.2.2 From 26fc8775b51484d8c0a671198639c6d5ae60533e Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 15 Apr 2011 20:08:19 +0200 Subject: mmc: fix a race between card-detect rescan and clock-gate work instances Currently there is a race in the MMC core between a card-detect rescan work and the clock-gating work, scheduled from a command completion. Fix it by removing the dedicated clock-gating mutex and using the MMC standard locking mechanism instead. Signed-off-by: Guennadi Liakhovetski Cc: Simon Horman Cc: Magnus Damm Acked-by: Linus Walleij Cc: Signed-off-by: Chris Ball --- include/linux/mmc/host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index bcb793ec7374..eb792cb6d745 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -183,7 +183,6 @@ struct mmc_host { struct work_struct clk_gate_work; /* delayed clock gate */ unsigned int clk_old; /* old clock value cache */ spinlock_t clk_lock; /* lock for clk fields */ - struct mutex clk_gate_mutex; /* mutex for clock gating */ #endif /* host specific block data */ -- cgit v1.2.2 From acad9853b95df6a3887f52e0ec88e4a77119ee28 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 27 Apr 2011 23:08:51 -0700 Subject: Input: wm831x-ts - allow IRQ flags to be specified This allows maximum flexibility for configuring the direct GPIO based interrupts. Signed-off-by: Mark Brown Signed-off-by: Dmitry Torokhov --- include/linux/mfd/wm831x/pdata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h index 173086d42af4..6b0eb130efb8 100644 --- a/include/linux/mfd/wm831x/pdata.h +++ b/include/linux/mfd/wm831x/pdata.h @@ -81,7 +81,9 @@ struct wm831x_touch_pdata { int rpu; /** Pen down sensitivity resistor divider */ int pressure; /** Report pressure (boolean) */ unsigned int data_irq; /** Touch data ready IRQ */ + int data_irqf; /** IRQ flags for data ready IRQ */ unsigned int pd_irq; /** Touch pendown detect IRQ */ + int pd_irqf; /** IRQ flags for pen down IRQ */ }; enum wm831x_watchdog_action { -- cgit v1.2.2 From 68972efa657040f891c7eda07c7da8c8dd576788 Mon Sep 17 00:00:00 2001 From: Paul Stewart Date: Thu, 28 Apr 2011 05:43:37 +0000 Subject: usbnet: Resubmit interrupt URB if device is open Resubmit interrupt URB if device is open. Use a flag set in usbnet_open() to determine this state. Also kill and free interrupt URB in usbnet_disconnect(). [Rebased off git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git] Signed-off-by: Paul Stewart Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 0e1855079fbb..605b0aa8d852 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -68,6 +68,7 @@ struct usbnet { # define EVENT_RX_PAUSED 5 # define EVENT_DEV_WAKING 6 # define EVENT_DEV_ASLEEP 7 +# define EVENT_DEV_OPEN 8 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) -- cgit v1.2.2 From 5d30b10bd68df007e7ae21e77d1e0ce184b53040 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Apr 2011 15:55:52 -0400 Subject: flex_array: flex_array_prealloc takes a number of elements, not an end Change flex_array_prealloc to take the number of elements for which space should be allocated instead of the last (inclusive) element. Users and documentation are updated accordingly. flex_arrays got introduced before they had users. When folks started using it, they ended up needing a different API than was coded up originally. This swaps over to the API that folks apparently need. Based-on-patch-by: Steffen Klassert Signed-off-by: Eric Paris Tested-by: Chris Richards Acked-by: Dave Hansen Cc: stable@kernel.org [2.6.38+] --- include/linux/flex_array.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 70e4efabe0fb..ebeb2f3ad068 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -61,7 +61,7 @@ struct flex_array { struct flex_array *flex_array_alloc(int element_size, unsigned int total, gfp_t flags); int flex_array_prealloc(struct flex_array *fa, unsigned int start, - unsigned int end, gfp_t flags); + unsigned int nr_elements, gfp_t flags); void flex_array_free(struct flex_array *fa); void flex_array_free_parts(struct flex_array *fa); int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, -- cgit v1.2.2 From 69c9dd1ecf446ad8a830e4afc539a2a1adc85b78 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 29 Apr 2011 00:36:05 +0200 Subject: PM: Export platform bus type's default PM callbacks Export the default PM callbacks defined for the platform bus type so that they can be used by power domains for suspending and resuming platform devices in the future. Signed-off-by: Rafael J. Wysocki --- include/linux/platform_device.h | 60 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 744942c95fec..e0093e061b08 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -205,4 +205,64 @@ static inline char *early_platform_driver_setup_func(void) \ } #endif /* MODULE */ +#ifdef CONFIG_PM_SLEEP +extern int platform_pm_prepare(struct device *dev); +extern void platform_pm_complete(struct device *dev); +#else +#define platform_pm_prepare NULL +#define platform_pm_complete NULL +#endif + +#ifdef CONFIG_SUSPEND +extern int platform_pm_suspend(struct device *dev); +extern int platform_pm_suspend_noirq(struct device *dev); +extern int platform_pm_resume(struct device *dev); +extern int platform_pm_resume_noirq(struct device *dev); +#else +#define platform_pm_suspend NULL +#define platform_pm_resume NULL +#define platform_pm_suspend_noirq NULL +#define platform_pm_resume_noirq NULL +#endif + +#ifdef CONFIG_HIBERNATE_CALLBACKS +extern int platform_pm_freeze(struct device *dev); +extern int platform_pm_freeze_noirq(struct device *dev); +extern int platform_pm_thaw(struct device *dev); +extern int platform_pm_thaw_noirq(struct device *dev); +extern int platform_pm_poweroff(struct device *dev); +extern int platform_pm_poweroff_noirq(struct device *dev); +extern int platform_pm_restore(struct device *dev); +extern int platform_pm_restore_noirq(struct device *dev); +#else +#define platform_pm_freeze NULL +#define platform_pm_thaw NULL +#define platform_pm_poweroff NULL +#define platform_pm_restore NULL +#define platform_pm_freeze_noirq NULL +#define platform_pm_thaw_noirq NULL +#define platform_pm_poweroff_noirq NULL +#define platform_pm_restore_noirq NULL +#endif + +#ifdef CONFIG_PM_SLEEP +#define USE_PLATFORM_PM_SLEEP_OPS \ + .prepare = platform_pm_prepare, \ + .complete = platform_pm_complete, \ + .suspend = platform_pm_suspend, \ + .resume = platform_pm_resume, \ + .freeze = platform_pm_freeze, \ + .thaw = platform_pm_thaw, \ + .poweroff = platform_pm_poweroff, \ + .restore = platform_pm_restore, \ + .suspend_noirq = platform_pm_suspend_noirq, \ + .resume_noirq = platform_pm_resume_noirq, \ + .freeze_noirq = platform_pm_freeze_noirq, \ + .thaw_noirq = platform_pm_thaw_noirq, \ + .poweroff_noirq = platform_pm_poweroff_noirq, \ + .restore_noirq = platform_pm_restore_noirq, +#else +#define USE_PLATFORM_PM_SLEEP_OPS +#endif + #endif /* _PLATFORM_DEVICE_H_ */ -- cgit v1.2.2 From 1d2b71f61b6a10216274e27b717becf9ae101fc7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 29 Apr 2011 00:36:53 +0200 Subject: PM / Runtime: Add subsystem data field to struct dev_pm_info Some subsystems need to attach PM-related data to struct device and they need to use devres for this purpose. For their convenience and to make code more straightforward, add a new field called subsys_data to struct dev_pm_info and let subsystems use it for attaching PM-related information to devices. Convert the ARM shmobile platform to using the new field. Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index 512e09177e57..f4167d0faa67 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -460,6 +460,7 @@ struct dev_pm_info { unsigned long active_jiffies; unsigned long suspended_jiffies; unsigned long accounting_timestamp; + void *subsys_data; /* Owned by the subsystem. */ #endif }; -- cgit v1.2.2 From 8f62242246351b5a4bc0c1f00c0c7003edea128a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Apr 2011 13:19:47 +0200 Subject: perf events: Add generic front-end and back-end stalled cycle event definitions Add two generic hardware events: front-end and back-end stalled cycles. These events measure conditions when the CPU is executing code but its capabilities are not fully utilized. Understanding such situations and analyzing them is an important sub-task of code optimization workflows. Both events limit performance: most front end stalls tend to be caused by branch misprediction or instruction fetch cachemisses, backend stalls can be caused by various resource shortages or inefficient instruction scheduling. Front-end stalls are the more important ones: code cannot run fast if the instruction stream is not being kept up. An over-utilized back-end can cause front-end stalls and thus has to be kept an eye on as well. The exact composition is very program logic and instruction mix dependent. We use the terms 'stall', 'front-end' and 'back-end' loosely and try to use the best available events from specific CPUs that approximate these concepts. Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-7y40wib8n000io7hjpn1dsrm@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ac636dd20a0c..4e2d7ae71499 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -52,7 +52,8 @@ enum perf_hw_id { PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, PERF_COUNT_HW_BRANCH_MISSES = 5, PERF_COUNT_HW_BUS_CYCLES = 6, - PERF_COUNT_HW_STALLED_CYCLES = 7, + PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, + PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, PERF_COUNT_HW_MAX, /* non-ABI */ }; -- cgit v1.2.2 From 85eb8c8d0b0900c073b0e6f89979ac9c439ade1a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 30 Apr 2011 00:25:44 +0200 Subject: PM / Runtime: Generic clock manipulation rountines for runtime PM (v6) Many different platforms and subsystems may want to disable device clocks during suspend and enable them during resume which is going to be done in a very similar way in all those cases. For this reason, provide generic routines for the manipulation of device clocks during suspend and resume. Convert the ARM shmobile platform to using the new routines. Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 8de9aa6e7def..878cf84baeb1 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -245,4 +245,46 @@ static inline void pm_runtime_dont_use_autosuspend(struct device *dev) __pm_runtime_use_autosuspend(dev, false); } +struct pm_clk_notifier_block { + struct notifier_block nb; + struct dev_power_domain *pwr_domain; + char *con_ids[]; +}; + +#ifdef CONFIG_PM_RUNTIME_CLK +extern int pm_runtime_clk_init(struct device *dev); +extern void pm_runtime_clk_destroy(struct device *dev); +extern int pm_runtime_clk_add(struct device *dev, const char *con_id); +extern void pm_runtime_clk_remove(struct device *dev, const char *con_id); +extern int pm_runtime_clk_suspend(struct device *dev); +extern int pm_runtime_clk_resume(struct device *dev); +#else +static inline int pm_runtime_clk_init(struct device *dev) +{ + return -EINVAL; +} +static inline void pm_runtime_clk_destroy(struct device *dev) +{ +} +static inline int pm_runtime_clk_add(struct device *dev, const char *con_id) +{ + return -EINVAL; +} +static inline void pm_runtime_clk_remove(struct device *dev, const char *con_id) +{ +} +#define pm_runtime_clock_suspend NULL +#define pm_runtime_clock_resume NULL +#endif + +#ifdef CONFIG_HAVE_CLK +extern void pm_runtime_clk_add_notifier(struct bus_type *bus, + struct pm_clk_notifier_block *clknb); +#else +static inline void pm_runtime_clk_add_notifier(struct bus_type *bus, + struct pm_clk_notifier_block *clknb) +{ +} +#endif + #endif -- cgit v1.2.2 From 45a4a2372b364107cabea79f255b333236626416 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 21 Apr 2011 23:16:46 -0400 Subject: ftrace: Remove FTRACE_FL_FAILED flag Since we disable all function tracer processing if we detect that a modification of a instruction had failed, we do not need to track that the record has failed. No more ftrace processing is allowed, and the FTRACE_FL_FAILED flag is pointless. Removing this flag simplifies some of the code, but some ftrace_disabled checks needed to be added or move around a little. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ca29e03c1fac..2a195ffd4269 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -147,11 +147,10 @@ extern int ftrace_text_reserved(void *start, void *end); enum { FTRACE_FL_FREE = (1 << 0), - FTRACE_FL_FAILED = (1 << 1), - FTRACE_FL_FILTER = (1 << 2), - FTRACE_FL_ENABLED = (1 << 3), - FTRACE_FL_NOTRACE = (1 << 4), - FTRACE_FL_CONVERTED = (1 << 5), + FTRACE_FL_FILTER = (1 << 1), + FTRACE_FL_ENABLED = (1 << 2), + FTRACE_FL_NOTRACE = (1 << 3), + FTRACE_FL_CONVERTED = (1 << 4), }; struct dyn_ftrace { -- cgit v1.2.2 From d2c8c3eafbf715306ec891e7ca52d3d999acbe31 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 25 Apr 2011 14:32:42 -0400 Subject: ftrace: Remove FTRACE_FL_CONVERTED flag Since we disable all function tracer processing if we detect that a modification of a instruction had failed, we do not need to track that the record has failed. No more ftrace processing is allowed, and the FTRACE_FL_CONVERTED flag is pointless. The FTRACE_FL_CONVERTED flag was used to denote records that were successfully converted from mcount calls into nops. But if a single record fails, all of ftrace is disabled. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2a195ffd4269..32047449b309 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -150,7 +150,6 @@ enum { FTRACE_FL_FILTER = (1 << 1), FTRACE_FL_ENABLED = (1 << 2), FTRACE_FL_NOTRACE = (1 << 3), - FTRACE_FL_CONVERTED = (1 << 4), }; struct dyn_ftrace { -- cgit v1.2.2 From e2c85d8e3974c9041ad7b080846b28d2243e771b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 3 May 2011 15:15:55 -0400 Subject: drm/radeon/kms: add some new pci ids Signed-off-by: Alex Deucher Cc: stable@kernel.org Signed-off-by: Dave Airlie --- include/drm/drm_pciids.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 816e30cbd968..f04b2a3b0f49 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -155,6 +155,7 @@ {0x1002, 0x6719, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAYMAN|RADEON_NEW_MEMMAP}, \ {0x1002, 0x671c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAYMAN|RADEON_NEW_MEMMAP}, \ {0x1002, 0x671d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAYMAN|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x671f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAYMAN|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6720, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6721, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6722, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \ @@ -167,6 +168,7 @@ {0x1002, 0x6729, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6738, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6739, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x673e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6740, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6741, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6742, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ @@ -199,6 +201,7 @@ {0x1002, 0x688D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6898, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6899, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x689b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x689c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HEMLOCK|RADEON_NEW_MEMMAP}, \ {0x1002, 0x689d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HEMLOCK|RADEON_NEW_MEMMAP}, \ {0x1002, 0x689e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \ @@ -209,7 +212,9 @@ {0x1002, 0x68b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x68b8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \ {0x1002, 0x68b9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x68ba, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \ {0x1002, 0x68be, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x68bf, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \ {0x1002, 0x68c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x68c1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x68c7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ -- cgit v1.2.2 From 8aeb96f80232e9a701b5c4715504f4c9173978bd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 3 May 2011 19:28:02 -0400 Subject: drm/radeon/kms: fix gart setup on fusion parts (v2) Out of the entire GART/VM subsystem, the hw designers changed the location of 3 regs. v2: airlied: add parameter for userspace to work from. Signed-off-by: Alex Deucher Signed-off-by: Jerome Glisse Cc: stable@kernel.org Signed-off-by: Dave Airlie --- include/drm/radeon_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index 7aa5dddb2098..787f7b6fd622 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -910,6 +910,7 @@ struct drm_radeon_cs { #define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x09 /* clock crystal frequency */ #define RADEON_INFO_NUM_BACKENDS 0x0a /* DB/backends for r600+ - need for OQ */ #define RADEON_INFO_NUM_TILE_PIPES 0x0b /* tile pipes for r600+ */ +#define RADEON_INFO_FUSION_GART_WORKING 0x0c /* fusion writes to GTT were broken before this */ struct drm_radeon_info { uint32_t request; -- cgit v1.2.2 From e7e7ee2eab2080248084d71fe0a115ab745eb2aa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 4 May 2011 08:42:29 +0200 Subject: perf events: Clean up definitions and initializers, update copyrights Fix a few inconsistent style bits that were added over the past few months. Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-yv4hwf9yhnzoada8pcpb3a97@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 96 +++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9eec53d97370..207c16976a17 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -2,8 +2,8 @@ * Performance events: * * Copyright (C) 2008-2009, Thomas Gleixner - * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar - * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra * * Data type definitions, declarations, prototypes. * @@ -468,9 +468,9 @@ enum perf_callchain_context { PERF_CONTEXT_MAX = (__u64)-4095, }; -#define PERF_FLAG_FD_NO_GROUP (1U << 0) -#define PERF_FLAG_FD_OUTPUT (1U << 1) -#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */ +#define PERF_FLAG_FD_NO_GROUP (1U << 0) +#define PERF_FLAG_FD_OUTPUT (1U << 1) +#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */ #ifdef __KERNEL__ /* @@ -484,9 +484,9 @@ enum perf_callchain_context { #endif struct perf_guest_info_callbacks { - int (*is_in_guest) (void); - int (*is_user_mode) (void); - unsigned long (*get_guest_ip) (void); + int (*is_in_guest)(void); + int (*is_user_mode)(void); + unsigned long (*get_guest_ip)(void); }; #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -652,19 +652,19 @@ struct pmu { * Start the transaction, after this ->add() doesn't need to * do schedulability tests. */ - void (*start_txn) (struct pmu *pmu); /* optional */ + void (*start_txn) (struct pmu *pmu); /* optional */ /* * If ->start_txn() disabled the ->add() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ - int (*commit_txn) (struct pmu *pmu); /* optional */ + int (*commit_txn) (struct pmu *pmu); /* optional */ /* * Will cancel the transaction, assumes ->del() is called * for each successful ->add() during the transaction. */ - void (*cancel_txn) (struct pmu *pmu); /* optional */ + void (*cancel_txn) (struct pmu *pmu); /* optional */ }; /** @@ -712,15 +712,15 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, int, struct pt_regs *regs); enum perf_group_flag { - PERF_GROUP_SOFTWARE = 0x1, + PERF_GROUP_SOFTWARE = 0x1, }; -#define SWEVENT_HLIST_BITS 8 -#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) +#define SWEVENT_HLIST_BITS 8 +#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) struct swevent_hlist { - struct hlist_head heads[SWEVENT_HLIST_SIZE]; - struct rcu_head rcu_head; + struct hlist_head heads[SWEVENT_HLIST_SIZE]; + struct rcu_head rcu_head; }; #define PERF_ATTACH_CONTEXT 0x01 @@ -733,13 +733,13 @@ struct swevent_hlist { * This is a per-cpu dynamically allocated data structure. */ struct perf_cgroup_info { - u64 time; - u64 timestamp; + u64 time; + u64 timestamp; }; struct perf_cgroup { - struct cgroup_subsys_state css; - struct perf_cgroup_info *info; /* timing info, one per cpu */ + struct cgroup_subsys_state css; + struct perf_cgroup_info *info; /* timing info, one per cpu */ }; #endif @@ -923,7 +923,7 @@ struct perf_event_context { /* * Number of contexts where an event can trigger: - * task, softirq, hardirq, nmi. + * task, softirq, hardirq, nmi. */ #define PERF_NR_CONTEXTS 4 @@ -1001,8 +1001,7 @@ struct perf_sample_data { struct perf_raw_record *raw; }; -static inline -void perf_sample_data_init(struct perf_sample_data *data, u64 addr) +static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) { data->addr = addr; data->raw = NULL; @@ -1039,8 +1038,7 @@ extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); #ifndef perf_arch_fetch_caller_regs -static inline void -perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } +static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } #endif /* @@ -1080,8 +1078,7 @@ static inline void perf_event_task_sched_in(struct task_struct *task) __perf_event_task_sched_in(task); } -static inline -void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) +static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); @@ -1099,14 +1096,10 @@ extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); -extern void perf_callchain_user(struct perf_callchain_entry *entry, - struct pt_regs *regs); -extern void perf_callchain_kernel(struct perf_callchain_entry *entry, - struct pt_regs *regs); - +extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); -static inline void -perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) { if (entry->nr < PERF_MAX_STACK_DEPTH) entry->ip[entry->nr++] = ip; @@ -1142,9 +1135,9 @@ extern void perf_tp_event(u64 addr, u64 count, void *record, extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags -#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ - PERF_RECORD_MISC_KERNEL) -#define perf_instruction_pointer(regs) instruction_pointer(regs) +# define perf_misc_flags(regs) \ + (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL) +# define perf_instruction_pointer(regs) instruction_pointer(regs) #endif extern int perf_output_begin(struct perf_output_handle *handle, @@ -1179,9 +1172,9 @@ static inline void perf_bp_event(struct perf_event *event, void *data) { } static inline int perf_register_guest_info_callbacks -(struct perf_guest_info_callbacks *callbacks) { return 0; } +(struct perf_guest_info_callbacks *callbacks) { return 0; } static inline int perf_unregister_guest_info_callbacks -(struct perf_guest_info_callbacks *callbacks) { return 0; } +(struct perf_guest_info_callbacks *callbacks) { return 0; } static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } @@ -1194,23 +1187,22 @@ static inline void perf_event_disable(struct perf_event *event) { } static inline void perf_event_task_tick(void) { } #endif -#define perf_output_put(handle, x) \ - perf_output_copy((handle), &(x), sizeof(x)) +#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) /* * This has to have a higher priority than migration_notifier in sched.c. */ -#define perf_cpu_notifier(fn) \ -do { \ - static struct notifier_block fn##_nb __cpuinitdata = \ - { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ - fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \ - (void *)(unsigned long)smp_processor_id()); \ - fn(&fn##_nb, (unsigned long)CPU_STARTING, \ - (void *)(unsigned long)smp_processor_id()); \ - fn(&fn##_nb, (unsigned long)CPU_ONLINE, \ - (void *)(unsigned long)smp_processor_id()); \ - register_cpu_notifier(&fn##_nb); \ +#define perf_cpu_notifier(fn) \ +do { \ + static struct notifier_block fn##_nb __cpuinitdata = \ + { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ + fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \ + (void *)(unsigned long)smp_processor_id()); \ + fn(&fn##_nb, (unsigned long)CPU_STARTING, \ + (void *)(unsigned long)smp_processor_id()); \ + fn(&fn##_nb, (unsigned long)CPU_ONLINE, \ + (void *)(unsigned long)smp_processor_id()); \ + register_cpu_notifier(&fn##_nb); \ } while (0) #endif /* __KERNEL__ */ -- cgit v1.2.2 From 2d06d8c49afdcc9bb35a85039fa50f0fe35bd40e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 27 Mar 2011 15:04:46 +0200 Subject: [CPUFREQ] use dynamic debug instead of custom infrastructure With dynamic debug having gained the capability to report debug messages also during the boot process, it offers a far superior interface for debug messages than the custom cpufreq infrastructure. As a first step, remove the old cpufreq_debug_printk() function and replace it with a call to the generic pr_debug() function. How can dynamic debug be used on cpufreq? You need a kernel which has CONFIG_DYNAMIC_DEBUG enabled. To enabled debugging during runtime, mount debugfs and $ echo -n 'module cpufreq +p' > /sys/kernel/debug/dynamic_debug/control for debugging the complete "cpufreq" module. To achieve the same goal during boot, append ddebug_query="module cpufreq +p" as a boot parameter to the kernel of your choice. For more detailled instructions, please see Documentation/dynamic-debug-howto.txt Signed-off-by: Dominik Brodowski Signed-off-by: Dave Jones --- include/linux/cpufreq.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 9343dd3de858..2845f6e67221 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -397,23 +397,4 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, void cpufreq_frequency_table_put_attr(unsigned int cpu); -/********************************************************************* - * UNIFIED DEBUG HELPERS * - *********************************************************************/ - -#define CPUFREQ_DEBUG_CORE 1 -#define CPUFREQ_DEBUG_DRIVER 2 -#define CPUFREQ_DEBUG_GOVERNOR 4 - -#ifdef CONFIG_CPU_FREQ_DEBUG - -extern void cpufreq_debug_printk(unsigned int type, const char *prefix, - const char *fmt, ...); - -#else - -#define cpufreq_debug_printk(msg...) do { } while(0) - -#endif /* CONFIG_CPU_FREQ_DEBUG */ - #endif /* _LINUX_CPUFREQ_H */ -- cgit v1.2.2 From 335dc3335ff692173bc766b248f7a97f3a23b30a Mon Sep 17 00:00:00 2001 From: Thiago Farina Date: Thu, 28 Apr 2011 20:42:53 -0300 Subject: [CPUFREQ] cpufreq.h: Fix some checkpatch.pl coding style issues. Before: $ scripts/checkpatch.pl --file --terse include/linux/cpufreq.h total: 14 errors, 11 warnings, 419 lines checked After: $ scripts/checkpatch.pl --file --terse include/linux/cpufreq.h total: 2 errors, 4 warnings, 422 lines checked Signed-off-by: Thiago Farina Signed-off-by: Dave Jones --- include/linux/cpufreq.h | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 2845f6e67221..11be48e0d168 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -3,7 +3,7 @@ * * Copyright (C) 2001 Russell King * (C) 2002 - 2003 Dominik Brodowski - * + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -56,9 +56,9 @@ static inline int cpufreq_unregister_notifier(struct notifier_block *nb, #define CPUFREQ_POLICY_POWERSAVE (1) #define CPUFREQ_POLICY_PERFORMANCE (2) -/* Frequency values here are CPU kHz so that hardware which doesn't run - * with some frequencies can complain without having to guess what per - * cent / per mille means. +/* Frequency values here are CPU kHz so that hardware which doesn't run + * with some frequencies can complain without having to guess what per + * cent / per mille means. * Maximum transition latency is in nanoseconds - if it's unknown, * CPUFREQ_ETERNAL shall be used. */ @@ -72,13 +72,15 @@ extern struct kobject *cpufreq_global_kobject; struct cpufreq_cpuinfo { unsigned int max_freq; unsigned int min_freq; - unsigned int transition_latency; /* in 10^(-9) s = nanoseconds */ + + /* in 10^(-9) s = nanoseconds */ + unsigned int transition_latency; }; struct cpufreq_real_policy { unsigned int min; /* in kHz */ unsigned int max; /* in kHz */ - unsigned int policy; /* see above */ + unsigned int policy; /* see above */ struct cpufreq_governor *governor; /* see below */ }; @@ -94,7 +96,7 @@ struct cpufreq_policy { unsigned int max; /* in kHz */ unsigned int cur; /* in kHz, only needed if cpufreq * governors are used */ - unsigned int policy; /* see above */ + unsigned int policy; /* see above */ struct cpufreq_governor *governor; /* see below */ struct work_struct update; /* if update_policy() needs to be @@ -167,11 +169,11 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, u_int mu struct cpufreq_governor { char name[CPUFREQ_NAME_LEN]; - int (*governor) (struct cpufreq_policy *policy, + int (*governor) (struct cpufreq_policy *policy, unsigned int event); ssize_t (*show_setspeed) (struct cpufreq_policy *policy, char *buf); - int (*store_setspeed) (struct cpufreq_policy *policy, + int (*store_setspeed) (struct cpufreq_policy *policy, unsigned int freq); unsigned int max_transition_latency; /* HW must be able to switch to next freq faster than this value in nano secs or we @@ -180,7 +182,8 @@ struct cpufreq_governor { struct module *owner; }; -/* pass a target to the cpufreq driver +/* + * Pass a target to the cpufreq driver. */ extern int cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, @@ -237,9 +240,9 @@ struct cpufreq_driver { /* flags */ -#define CPUFREQ_STICKY 0x01 /* the driver isn't removed even if +#define CPUFREQ_STICKY 0x01 /* the driver isn't removed even if * all ->init() calls failed */ -#define CPUFREQ_CONST_LOOPS 0x02 /* loops_per_jiffy or other kernel +#define CPUFREQ_CONST_LOOPS 0x02 /* loops_per_jiffy or other kernel * "constants" aren't affected by * frequency transitions */ #define CPUFREQ_PM_NO_WARN 0x04 /* don't warn on suspend/resume speed @@ -252,7 +255,7 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state); -static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max) +static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max) { if (policy->min < min) policy->min = min; @@ -386,12 +389,12 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, /* the following 3 funtions are for cpufreq core use only */ struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu); struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu); -void cpufreq_cpu_put (struct cpufreq_policy *data); +void cpufreq_cpu_put(struct cpufreq_policy *data); /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; -void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, +void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, unsigned int cpu); void cpufreq_frequency_table_put_attr(unsigned int cpu); -- cgit v1.2.2 From 30106b8ce2cc2243514116d6f29086e6deecc754 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 May 2011 15:38:19 +0200 Subject: slub: Fix the lockless code on 32-bit platforms with no 64-bit cmpxchg The SLUB allocator use of the cmpxchg_double logic was wrong: it actually needs the irq-safe one. That happens automatically when we use the native unlocked 'cmpxchg8b' instruction, but when compiling the kernel for older x86 CPUs that do not support that instruction, we fall back to the generic emulation code. And if you don't specify that you want the irq-safe version, the generic code ends up just open-coding the cmpxchg8b equivalent without any protection against interrupts or preemption. Which definitely doesn't work for SLUB. This was reported by Werner Landgraf , who saw instability with his distro-kernel that was compiled to support pretty much everything under the sun. Most big Linux distributions tend to compile for PPro and later, and would never have noticed this problem. This also fixes the prototypes for the irqsafe cmpxchg_double functions to use 'bool' like they should. [ Btw, that whole "generic code defaults to no protection" design just sounds stupid - if the code needs no protection, there is no reason to use "cmpxchg_double" to begin with. So we should probably just remove the unprotected version entirely as pointless. - Linus ] Signed-off-by: Thomas Gleixner Reported-and-tested-by: werner Acked-and-tested-by: Ingo Molnar Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: Jens Axboe Cc: Tejun Heo Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1105041539050.3005@ionos Signed-off-by: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 3a5c4449fd36..8b97308e65df 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -948,7 +948,7 @@ do { \ irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) # endif # define irqsafe_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __pcpu_double_call_return_int(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) + __pcpu_double_call_return_bool(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) #endif #endif /* __LINUX_PERCPU_H */ -- cgit v1.2.2 From a26ac2455ffcf3be5c6ef92bc6df7182700f2114 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 12 Jan 2011 14:10:23 -0800 Subject: rcu: move TREE_RCU from softirq to kthread If RCU priority boosting is to be meaningful, callback invocation must be boosted in addition to preempted RCU readers. Otherwise, in presence of CPU real-time threads, the grace period ends, but the callbacks don't get invoked. If the callbacks don't get invoked, the associated memory doesn't get freed, so the system is still subject to OOM. But it is not reasonable to priority-boost RCU_SOFTIRQ, so this commit moves the callback invocations to a kthread, which can be boosted easily. Also add comments and properly synchronized all accesses to rcu_cpu_kthread_task, as suggested by Lai Jiangshan. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/interrupt.h | 1 - include/trace/events/irq.h | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index bea0ac750712..6c12989839d9 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -414,7 +414,6 @@ enum TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, - RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS }; diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 1c09820df585..ae045ca7d356 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -20,8 +20,7 @@ struct softirq_action; softirq_name(BLOCK_IOPOLL), \ softirq_name(TASKLET), \ softirq_name(SCHED), \ - softirq_name(HRTIMER), \ - softirq_name(RCU)) + softirq_name(HRTIMER)) /** * irq_handler_entry - called immediately before the irq action handler -- cgit v1.2.2 From 4a29865689dbb87a02e3b0fff4a4ae5041273173 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 3 Apr 2011 21:33:51 -0700 Subject: rcu: make rcutorture version numbers available through debugfs It is not possible to accurately correlate rcutorture output with that of debugfs. This patch therefore adds a debugfs file that prints out the rcutorture version number, permitting easy correlation. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 13 ++++++++++++- include/linux/rcutree.h | 3 +++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ff422d2b7f90..9e169c2ba91f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -47,6 +47,18 @@ extern int rcutorture_runnable; /* for sysctl */ #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) +extern void rcutorture_record_test_transition(void); +extern void rcutorture_record_progress(unsigned long vernum); +#else +static inline void rcutorture_record_test_transition(void) +{ +} +static inline void rcutorture_record_progress(unsigned long vernum) +{ +} +#endif + #define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) @@ -68,7 +80,6 @@ extern void call_rcu_sched(struct rcu_head *head, extern void synchronize_sched(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); -extern int sched_expedited_torture_stats(char *page); static inline void __rcu_read_lock_bh(void) { diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 3a933482734a..284dad10c55b 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -58,9 +58,12 @@ static inline void synchronize_rcu_bh_expedited(void) extern void rcu_barrier(void); +extern unsigned long rcutorture_testseq; +extern unsigned long rcutorture_vernum; extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); extern long rcu_batches_completed_sched(void); + extern void rcu_force_quiescent_state(void); extern void rcu_bh_force_quiescent_state(void); extern void rcu_sched_force_quiescent_state(void); -- cgit v1.2.2 From b0c9d7ff2793502650ad987c3f237d5fe5587a1e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 Mar 2011 12:56:56 -0700 Subject: rcu: add DEBUG_OBJECTS_RCU_HEAD check for alignment Verify that rcu_head structures are aligned to a four-byte boundary. This check is enabled by CONFIG_DEBUG_OBJECTS_RCU_HEAD. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9e169c2ba91f..c7aeacf7fc98 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -785,6 +785,7 @@ extern struct debug_obj_descr rcuhead_debug_descr; static inline void debug_rcu_head_queue(struct rcu_head *head) { + WARN_ON_ONCE((unsigned long)head & 0x3); debug_object_activate(head, &rcuhead_debug_descr); debug_object_active_state(head, &rcuhead_debug_descr, STATE_RCU_HEAD_READY, -- cgit v1.2.2 From 9ab1544eb4196ca8d05c433b2eb56f74496b1ee3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 18 Mar 2011 11:15:47 +0800 Subject: rcu: introduce kfree_rcu() Many rcu callbacks functions just call kfree() on the base structure. These functions are trivial, but their size adds up, and furthermore when they are used in a kernel module, that module must invoke the high-latency rcu_barrier() function at module-unload time. The kfree_rcu() function introduced by this commit addresses this issue. Rather than encoding a function address in the embedded rcu_head structure, kfree_rcu() instead encodes the offset of the rcu_head structure within the base structure. Because the functions are not allowed in the low-order 4096 bytes of kernel virtual memory, offsets up to 4095 bytes can be accommodated. If the offset is larger than 4095 bytes, a compile-time error will be generated in __kfree_rcu(). If this error is triggered, you can either fall back to use of call_rcu() or rearrange the structure to position the rcu_head structure into the first 4096 bytes. Note that the allowable offset might decrease in the future, for example, to allow something like kmem_cache_free_rcu(). The new kfree_rcu() function can replace code as follows: call_rcu(&p->rcu, simple_kfree_callback); where "simple_kfree_callback()" might be defined as follows: void simple_kfree_callback(struct rcu_head *p) { struct foo *q = container_of(p, struct foo, rcu); kfree(q); } with the following: kfree_rcu(&p->rcu, rcu); Note that the "rcu" is the name of a field in the structure being freed. The reason for using this rather than passing in a pointer to the base structure is that the above approach allows better type checking. This commit is based on earlier work by Lai Jiangshan and Manfred Spraul: Lai's V1 patch: http://lkml.org/lkml/2008/9/18/1 Manfred's patch: http://lkml.org/lkml/2009/1/2/115 Signed-off-by: Lai Jiangshan Signed-off-by: Manfred Spraul Signed-off-by: Paul E. McKenney Reviewed-by: David Howells Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index c7aeacf7fc98..99f9aa7c2804 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -809,4 +809,60 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) +{ + return offset < 4096; +} + +static __always_inline +void __kfree_rcu(struct rcu_head *head, unsigned long offset) +{ + typedef void (*rcu_callback)(struct rcu_head *); + + BUILD_BUG_ON(!__builtin_constant_p(offset)); + + /* See the kfree_rcu() header comment. */ + BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); + + call_rcu(head, (rcu_callback)offset); +} + +extern void kfree(const void *); + +static inline void __rcu_reclaim(struct rcu_head *head) +{ + unsigned long offset = (unsigned long)head->func; + + if (__is_kfree_rcu_offset(offset)) + kfree((void *)head - offset); + else + head->func(head); +} + +/** + * kfree_rcu() - kfree an object after a grace period. + * @ptr: pointer to kfree + * @rcu_head: the name of the struct rcu_head within the type of @ptr. + * + * Many rcu callbacks functions just call kfree() on the base structure. + * These functions are trivial, but their size adds up, and furthermore + * when they are used in a kernel module, that module must invoke the + * high-latency rcu_barrier() function at module-unload time. + * + * The kfree_rcu() function handles this issue. Rather than encoding a + * function address in the embedded rcu_head structure, kfree_rcu() instead + * encodes the offset of the rcu_head structure within the base structure. + * Because the functions are not allowed in the low-order 4096 bytes of + * kernel virtual memory, offsets up to 4095 bytes can be accommodated. + * If the offset is larger than 4095 bytes, a compile-time error will + * be generated in __kfree_rcu(). If this error is triggered, you can + * either fall back to use of call_rcu() or rearrange the structure to + * position the rcu_head structure into the first 4096 bytes. + * + * Note that the allowable offset might decrease in the future, for example, + * to allow something like kmem_cache_free_rcu(). + */ +#define kfree_rcu(ptr, rcu_head) \ + __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) + #endif /* __LINUX_RCUPDATE_H */ -- cgit v1.2.2 From 29ce831000081dd757d3116bf774aafffc4b6b20 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 4 May 2011 16:31:03 +0300 Subject: rcu: provide rcu_virt_note_context_switch() function. Provide rcu_virt_note_context_switch() for vitalization use to note quiescent state during guest entry. Signed-off-by: Gleb Natapov Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 8 ++++++++ include/linux/rcutree.h | 10 ++++++++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 30ebd7c8d874..52b3e0281fd0 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -99,6 +99,14 @@ static inline void rcu_note_context_switch(int cpu) rcu_preempt_note_context_switch(); } +/* + * Take advantage of the fact that there is only one CPU, which + * allows us to ignore virtualization-based context switches. + */ +static inline void rcu_virt_note_context_switch(int cpu) +{ +} + /* * Return the number of grace periods. */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 284dad10c55b..e65d06634dd8 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -35,6 +35,16 @@ extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); extern void rcu_cpu_stall_reset(void); +/* + * Note a virtualization-based context switch. This is simply a + * wrapper around rcu_note_context_switch(), which allows TINY_RCU + * to save a few bytes. + */ +static inline void rcu_virt_note_context_switch(int cpu) +{ + rcu_note_context_switch(cpu); +} + #ifdef CONFIG_TREE_PREEMPT_RCU extern void exit_rcu(void); -- cgit v1.2.2 From a3a4a5acd3bd2f6f1e102e1f1b9d2e2bb320a7fd Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 5 May 2011 23:55:18 -0400 Subject: Regression: partial revert "tracing: Remove lock_depth from event entry" This partially reverts commit e6e1e2593592a8f6f6380496655d8c6f67431266. That commit changed the structure layout of the trace structure, which in turn broke PowerTOP (1.9x generation) quite badly. I appreciate not wanting to expose the variable in question, and PowerTOP was not using it, so I've replaced the variable with just a padding field - that way if in the future a new field is needed it can just use this padding field. Signed-off-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- include/linux/ftrace_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 22b32af1b5ec..b5a550a39a70 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -37,6 +37,7 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; + int padding; }; #define FTRACE_MAX_EVENT \ -- cgit v1.2.2 From 880ffb5c6c5c8c8c6efd9efe9355317322b4603b Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Thu, 5 May 2011 07:55:36 +0800 Subject: driver core: Add the device driver-model structures to kerneldoc Add the comments to the structure bus_type, device_driver, device, class to device.h for generating the driver-model kerneldoc. With another patch these all removed from the files in Documentation/driver-model/ since they are out of date. That will keep things up to date and provide a better way to document this stuff. Signed-off-by: Wanlong Gao Acked-by: Harry Wei Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 154 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 2215d013ca96..42365067a836 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -47,6 +47,38 @@ extern int __must_check bus_create_file(struct bus_type *, struct bus_attribute *); extern void bus_remove_file(struct bus_type *, struct bus_attribute *); +/** + * struct bus_type - The bus type of the device + * + * @name: The name of the bus. + * @bus_attrs: Default attributes of the bus. + * @dev_attrs: Default attributes of the devices on the bus. + * @drv_attrs: Default attributes of the device drivers on the bus. + * @match: Called, perhaps multiple times, whenever a new device or driver + * is added for this bus. It should return a nonzero value if the + * given device can be handled by the given driver. + * @uevent: Called when a device is added, removed, or a few other things + * that generate uevents to add the environment variables. + * @probe: Called when a new device or driver add to this bus, and callback + * the specific driver's probe to initial the matched device. + * @remove: Called when a device removed from this bus. + * @shutdown: Called at shut-down time to quiesce the device. + * @suspend: Called when a device on this bus wants to go to sleep mode. + * @resume: Called to bring a device on this bus out of sleep mode. + * @pm: Power management operations of this bus, callback the specific + * device driver's pm-ops. + * @p: The private data of the driver core, only the driver core can + * touch this. + * + * A bus is a channel between the processor and one or more devices. For the + * purposes of the device model, all devices are connected via a bus, even if + * it is an internal, virtual, "platform" bus. Buses can plug into each other. + * A USB controller is usually a PCI device, for example. The device model + * represents the actual connections between buses and the devices they control. + * A bus is represented by the bus_type structure. It contains the name, the + * default attributes, the bus' methods, PM operations, and the driver core's + * private data. + */ struct bus_type { const char *name; struct bus_attribute *bus_attrs; @@ -119,6 +151,37 @@ extern int bus_unregister_notifier(struct bus_type *bus, extern struct kset *bus_get_kset(struct bus_type *bus); extern struct klist *bus_get_device_klist(struct bus_type *bus); +/** + * struct device_driver - The basic device driver structure + * @name: Name of the device driver. + * @bus: The bus which the device of this driver belongs to. + * @owner: The module owner. + * @mod_name: Used for built-in modules. + * @suppress_bind_attrs: Disables bind/unbind via sysfs. + * @of_match_table: The open firmware table. + * @probe: Called to query the existence of a specific device, + * whether this driver can work with it, and bind the driver + * to a specific device. + * @remove: Called when the device is removed from the system to + * unbind a device from this driver. + * @shutdown: Called at shut-down time to quiesce the device. + * @suspend: Called to put the device to sleep mode. Usually to a + * low power state. + * @resume: Called to bring a device from sleep mode. + * @groups: Default attributes that get created by the driver core + * automatically. + * @pm: Power management operations of the device which matched + * this driver. + * @p: Driver core's private data, no one other than the driver + * core can touch this. + * + * The device driver-model tracks all of the drivers known to the system. + * The main reason for this tracking is to enable the driver core to match + * up drivers with new devices. Once drivers are known objects within the + * system, however, a number of other things become possible. Device drivers + * can export information and configuration variables that are independent + * of any specific device. + */ struct device_driver { const char *name; struct bus_type *bus; @@ -185,8 +248,34 @@ struct device *driver_find_device(struct device_driver *drv, struct device *start, void *data, int (*match)(struct device *dev, void *data)); -/* - * device classes +/** + * struct class - device classes + * @name: Name of the class. + * @owner: The module owner. + * @class_attrs: Default attributes of this class. + * @dev_attrs: Default attributes of the devices belong to the class. + * @dev_bin_attrs: Default binary attributes of the devices belong to the class. + * @dev_kobj: The kobject that represents this class and links it into the hierarchy. + * @dev_uevent: Called when a device is added, removed from this class, or a + * few other things that generate uevents to add the environment + * variables. + * @devnode: Callback to provide the devtmpfs. + * @class_release: Called to release this class. + * @dev_release: Called to release the device. + * @suspend: Used to put the device to sleep mode, usually to a low power + * state. + * @resume: Used to bring the device from the sleep mode. + * @ns_type: Callbacks so sysfs can detemine namespaces. + * @namespace: Namespace of the device belongs to this class. + * @pm: The default device power management operations of this class. + * @p: The private data of the driver core, no one other than the + * driver core can touch this. + * + * A class is a higher-level view of a device that abstracts out low-level + * implementation details. Drivers may see a SCSI disk or an ATA disk, but, + * at the class level, they are all simply disks. Classes allow user space + * to work with devices based on what they do, rather than how they are + * connected or how they work. */ struct class { const char *name; @@ -401,6 +490,65 @@ struct device_dma_parameters { unsigned long segment_boundary_mask; }; +/** + * struct device - The basic device structure + * @parent: The device's "parent" device, the device to which it is attached. + * In most cases, a parent device is some sort of bus or host + * controller. If parent is NULL, the device, is a top-level device, + * which is not usually what you want. + * @p: Holds the private data of the driver core portions of the device. + * See the comment of the struct device_private for detail. + * @kobj: A top-level, abstract class from which other classes are derived. + * @init_name: Initial name of the device. + * @type: The type of device. + * This identifies the device type and carries type-specific + * information. + * @mutex: Mutex to synchronize calls to its driver. + * @bus: Type of bus device is on. + * @driver: Which driver has allocated this + * @platform_data: Platform data specific to the device. + * Example: For devices on custom boards, as typical of embedded + * and SOC based hardware, Linux often uses platform_data to point + * to board-specific structures describing devices and how they + * are wired. That can include what ports are available, chip + * variants, which GPIO pins act in what additional roles, and so + * on. This shrinks the "Board Support Packages" (BSPs) and + * minimizes board-specific #ifdefs in drivers. + * @power: For device power management. + * See Documentation/power/devices.txt for details. + * @pwr_domain: Provide callbacks that are executed during system suspend, + * hibernation, system resume and during runtime PM transitions + * along with subsystem-level and driver-level callbacks. + * @numa_node: NUMA node this device is close to. + * @dma_mask: Dma mask (if dma'ble device). + * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all + * hardware supports 64-bit addresses for consistent allocations + * such descriptors. + * @dma_parms: A low level driver may set these to teach IOMMU code about + * segment limitations. + * @dma_pools: Dma pools (if dma'ble device). + * @dma_mem: Internal for coherent mem override. + * @archdata: For arch-specific additions. + * @of_node: Associated device tree node. + * @of_match: Matching of_device_id from driver. + * @devt: For creating the sysfs "dev". + * @devres_lock: Spinlock to protect the resource of the device. + * @devres_head: The resources list of the device. + * @knode_class: The node used to add the device to the class list. + * @class: The class of the device. + * @groups: Optional attribute groups. + * @release: Callback to free the device after all references have + * gone away. This should be set by the allocator of the + * device (i.e. the bus driver that discovered the device). + * + * At the lowest level, every device in a Linux system is represented by an + * instance of struct device. The device structure contains the information + * that the device model core needs to model the system. Most subsystems, + * however, track additional information about the devices they host. As a + * result, it is rare for devices to be represented by bare device structures; + * instead, that structure, like kobject structures, is usually embedded within + * a higher-level representation of the device. + */ struct device { struct device *parent; @@ -611,7 +759,7 @@ extern int (*platform_notify)(struct device *dev); extern int (*platform_notify_remove)(struct device *dev); -/** +/* * get_device - atomically increment the reference count for the device. * */ -- cgit v1.2.2 From 1231f0baa547a541a7481119323b7f964dda4788 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Mar 2011 18:05:02 +0800 Subject: net,rcu: convert call_rcu(sctp_local_addr_free) to kfree_rcu() The rcu callback sctp_local_addr_free() just calls a kfree(), so we use kfree_rcu() instead of the call_rcu(sctp_local_addr_free). Signed-off-by: Lai Jiangshan Acked-by: David S. Miller Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/net/sctp/sctp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 505845ddb0be..01e094c6d0ae 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -115,7 +115,6 @@ * sctp/protocol.c */ extern struct sock *sctp_get_ctl_sock(void); -extern void sctp_local_addr_free(struct rcu_head *head); extern int sctp_copy_local_addr_list(struct sctp_bind_addr *, sctp_scope_t, gfp_t gfp, int flags); -- cgit v1.2.2 From 2bbd4492552867053b5a618a2474297e2b1c355d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 6 May 2011 23:47:53 +0200 Subject: drm: mm: fix debug output The looping helper didn't do anything due to a superficial semicolon. Furthermore one of the two dump functions suffered from copy&paste fail. While staring at the code I've also noticed that the replace helper (currently unused) is a bit broken. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- include/drm/drm_mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index c2f93a8ae2e1..564b14aa7e16 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -86,7 +86,7 @@ static inline bool drm_mm_initialized(struct drm_mm *mm) } #define drm_mm_for_each_node(entry, mm) list_for_each_entry(entry, \ &(mm)->head_node.node_list, \ - node_list); + node_list) #define drm_mm_for_each_scanned_node_reverse(entry, n, mm) \ for (entry = (mm)->prev_scanned_node, \ next = entry ? list_entry(entry->node_list.next, \ -- cgit v1.2.2 From a09a79f66874c905af35d5bb5e5f2fdc7b6b894d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 9 May 2011 13:01:09 +0200 Subject: Don't lock guardpage if the stack is growing up Linux kernel excludes guard page when performing mlock on a VMA with down-growing stack. However, some architectures have up-growing stack and locking the guard page should be excluded in this case too. This patch fixes lvm2 on PA-RISC (and possibly other architectures with up-growing stack). lvm2 calculates number of used pages when locking and when unlocking and reports an internal error if the numbers mismatch. [ Patch changed fairly extensively to also fix /proc//maps for the grows-up case, and to move things around a bit to clean it all up and share the infrstructure with the /proc bits. Tested on ia64 that has both grow-up and grow-down segments - Linus ] Signed-off-by: Mikulas Patocka Tested-by: Tony Luck Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2348db26bc3d..6507dde38b16 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1011,11 +1011,33 @@ int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); /* Is the vma a continuation of the stack vma above it? */ -static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) +static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr) { return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); } +static inline int stack_guard_page_start(struct vm_area_struct *vma, + unsigned long addr) +{ + return (vma->vm_flags & VM_GROWSDOWN) && + (vma->vm_start == addr) && + !vma_growsdown(vma->vm_prev, addr); +} + +/* Is the vma a continuation of the stack vma below it? */ +static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr) +{ + return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP); +} + +static inline int stack_guard_page_end(struct vm_area_struct *vma, + unsigned long addr) +{ + return (vma->vm_flags & VM_GROWSUP) && + (vma->vm_end == addr) && + !vma_growsup(vma->vm_next, addr); +} + extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len); -- cgit v1.2.2 From a9bb79128aa659f97b774b97c9bb1bdc74444595 Mon Sep 17 00:00:00 2001 From: "Hefty, Sean" Date: Mon, 9 May 2011 22:06:10 -0700 Subject: RDMA/cma: Add an ID_REUSEADDR option Lustre requires that clients bind to a privileged port number before connecting to a remote server. On larger clusters (typically more than about 1000 nodes), the number of privileged ports is exhausted, resulting in lustre being unusable. To handle this, we add support for reusable addresses to the rdma_cm. This mimics the behavior of the socket option SO_REUSEADDR. A user may set an rdma_cm_id to reuse an address before calling rdma_bind_addr() (explicitly or implicitly). If set, other rdma_cm_id's may be bound to the same address, provided that they all have reuse enabled, and there are no active listens. If rdma_listen() is called on an rdma_cm_id that has reuse enabled, it will only succeed if there are no other id's bound to that same address. The reuse option is exported to user space. The behavior of the kernel reuse implementation was verified against that given by sockets. This patch is derived from a path by Ira Weiny Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/rdma_cm.h | 10 ++++++++++ include/rdma/rdma_user_cm.h | 5 +++-- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 4fae90304648..169f7a53fb0c 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -329,4 +329,14 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr); */ void rdma_set_service_type(struct rdma_cm_id *id, int tos); +/** + * rdma_set_reuseaddr - Allow the reuse of local addresses when binding + * the rdma_cm_id. + * @id: Communication identifier to configure. + * @reuse: Value indicating if the bound address is reusable. + * + * Reuse must be set before an address is bound to the id. + */ +int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse); + #endif /* RDMA_CM_H */ diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h index 1d165022c02d..fc82c1896f75 100644 --- a/include/rdma/rdma_user_cm.h +++ b/include/rdma/rdma_user_cm.h @@ -221,8 +221,9 @@ enum { /* Option details */ enum { - RDMA_OPTION_ID_TOS = 0, - RDMA_OPTION_IB_PATH = 1 + RDMA_OPTION_ID_TOS = 0, + RDMA_OPTION_ID_REUSEADDR = 1, + RDMA_OPTION_IB_PATH = 1 }; struct rdma_ucm_set_option { -- cgit v1.2.2 From d0c49bf391b2e230a8f3ae4486da7df440f1216d Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 9 May 2011 22:23:57 -0700 Subject: RDMA/iwcm: Get rid of enum iw_cm_event_status The IW_CM_EVENT_STATUS_xxx values were used in only a couple of places; cma.c uses -Exxx values instead, and so do the amso1100, cxgb3 and cxgb4 drivers -- only nes was using the enum values (with the mild consequence that all nes connection failures were treated as generic errors rather than reported as timeouts or rejections). We can fix this confusion by getting rid of enum iw_cm_event_status and using a plain int for struct iw_cm_event.status, and converting nes to use -Exxx as the other iWARP drivers do. This also gets rid of the warning drivers/infiniband/core/cma.c: In function 'cma_iw_handler': drivers/infiniband/core/cma.c:1333:3: warning: case value '4294967185' not in enumerated type 'enum iw_cm_event_status' drivers/infiniband/core/cma.c:1336:3: warning: case value '4294967186' not in enumerated type 'enum iw_cm_event_status' drivers/infiniband/core/cma.c:1332:3: warning: case value '4294967192' not in enumerated type 'enum iw_cm_event_status' Signed-off-by: Roland Dreier Reviewed-by: Steve Wise Reviewed-by: Sean Hefty Reviewed-by: Faisal Latif --- include/rdma/iw_cm.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include') diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index cbb822e8d791..2d0191c90f9e 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -46,18 +46,9 @@ enum iw_cm_event_type { IW_CM_EVENT_CLOSE /* close complete */ }; -enum iw_cm_event_status { - IW_CM_EVENT_STATUS_OK = 0, /* request successful */ - IW_CM_EVENT_STATUS_ACCEPTED = 0, /* connect request accepted */ - IW_CM_EVENT_STATUS_REJECTED, /* connect request rejected */ - IW_CM_EVENT_STATUS_TIMEOUT, /* the operation timed out */ - IW_CM_EVENT_STATUS_RESET, /* reset from remote peer */ - IW_CM_EVENT_STATUS_EINVAL, /* asynchronous failure for bad parm */ -}; - struct iw_cm_event { enum iw_cm_event_type event; - enum iw_cm_event_status status; + int status; struct sockaddr_in local_addr; struct sockaddr_in remote_addr; void *private_data; -- cgit v1.2.2 From 7a4f0761fce32ff4918a7c23b08db564ad33092d Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Tue, 3 May 2011 22:09:31 +0200 Subject: IPVS: init and cleanup restructuring DESCRIPTION This patch tries to restore the initial init and cleanup sequences that was before namspace patch. Netns also requires action when net devices unregister which has never been implemented. I.e this patch also covers when a device moves into a network namespace, and has to be released. IMPLEMENTATION The number of calls to register_pernet_device have been reduced to one for the ip_vs.ko Schedulers still have their own calls. This patch adds a function __ip_vs_service_cleanup() and an enable flag for the netfilter hooks. The nf hooks will be enabled when the first service is loaded and never disabled again, except when a namespace exit starts. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov [horms@verge.net.au: minor edit to changelog] Signed-off-by: Simon Horman --- include/net/ip_vs.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d516f00c8e0f..86aefed6140b 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -791,6 +791,7 @@ struct ip_vs_app { /* IPVS in network namespace */ struct netns_ipvs { int gen; /* Generation */ + int enable; /* enable like nf_hooks do */ /* * Hash table: for real service lookups */ @@ -1089,6 +1090,22 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp) atomic_inc(&ctl_cp->n_control); } +/* + * IPVS netns init & cleanup functions + */ +extern int __ip_vs_estimator_init(struct net *net); +extern int __ip_vs_control_init(struct net *net); +extern int __ip_vs_protocol_init(struct net *net); +extern int __ip_vs_app_init(struct net *net); +extern int __ip_vs_conn_init(struct net *net); +extern int __ip_vs_sync_init(struct net *net); +extern void __ip_vs_conn_cleanup(struct net *net); +extern void __ip_vs_app_cleanup(struct net *net); +extern void __ip_vs_protocol_cleanup(struct net *net); +extern void __ip_vs_control_cleanup(struct net *net); +extern void __ip_vs_estimator_cleanup(struct net *net); +extern void __ip_vs_sync_cleanup(struct net *net); +extern void __ip_vs_service_cleanup(struct net *net); /* * IPVS application functions -- cgit v1.2.2 From 43a4dea4c9d44baae38ddc14b9b6d86fde4c8b88 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 9 May 2011 19:36:38 +0000 Subject: xfrm: Assign the inner mode output function to the dst entry As it is, we assign the outer modes output function to the dst entry when we create the xfrm bundle. This leads to two problems on interfamily scenarios. We might insert ipv4 packets into ip6_fragment when called from xfrm6_output. The system crashes if we try to fragment an ipv4 packet with ip6_fragment. This issue was introduced with git commit ad0081e4 (ipv6: Fragment locally generated tunnel-mode IPSec6 packets as needed). The second issue is, that we might insert ipv4 packets in netfilter6 and vice versa on interfamily scenarios. With this patch we assign the inner mode output function to the dst entry when we create the xfrm bundle. So xfrm4_output/xfrm6_output from the inner mode is used and the right fragmentation and netfilter functions are called. We switch then to outer mode with the output_finish functions. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 6ae4bc5ce8a7..20afeaa39395 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -324,6 +324,7 @@ struct xfrm_state_afinfo { int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); int (*output)(struct sk_buff *skb); + int (*output_finish)(struct sk_buff *skb); int (*extract_input)(struct xfrm_state *x, struct sk_buff *skb); int (*extract_output)(struct xfrm_state *x, @@ -1454,6 +1455,7 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) extern int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_output(struct sk_buff *skb); +extern int xfrm4_output_finish(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm6_extract_header(struct sk_buff *skb); @@ -1470,6 +1472,7 @@ extern __be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr); extern int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_output(struct sk_buff *skb); +extern int xfrm6_output_finish(struct sk_buff *skb); extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); -- cgit v1.2.2 From 2e711c04dbbf7a7732a3f7073b1fc285d12b369d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 26 Apr 2011 19:15:07 +0200 Subject: PM: Remove sysdev suspend, resume and shutdown operations Since suspend, resume and shutdown operations in struct sysdev_class and struct sysdev_driver are not used any more, remove them. Also drop sysdev_suspend(), sysdev_resume() and sysdev_shutdown() used for executing those operations and modify all of their users accordingly. This reduces kernel code size quite a bit and reduces its complexity. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- include/linux/device.h | 7 ------- include/linux/pm.h | 8 -------- include/linux/sysdev.h | 11 ----------- 3 files changed, 26 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index ab8dfc095709..ea9db9b0f248 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -633,13 +633,6 @@ static inline int devtmpfs_mount(const char *mountpoint) { return 0; } /* drivers/base/power/shutdown.c */ extern void device_shutdown(void); -#ifndef CONFIG_ARCH_NO_SYSDEV_OPS -/* drivers/base/sys.c */ -extern void sysdev_shutdown(void); -#else -static inline void sysdev_shutdown(void) { } -#endif - /* debugging and troubleshooting/diagnostic helpers. */ extern const char *dev_driver_string(const struct device *dev); diff --git a/include/linux/pm.h b/include/linux/pm.h index 512e09177e57..3c053e2beb84 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -529,14 +529,6 @@ struct dev_power_domain { */ #ifdef CONFIG_PM_SLEEP -#ifndef CONFIG_ARCH_NO_SYSDEV_OPS -extern int sysdev_suspend(pm_message_t state); -extern int sysdev_resume(void); -#else -static inline int sysdev_suspend(pm_message_t state) { return 0; } -static inline int sysdev_resume(void) { return 0; } -#endif - extern void device_pm_lock(void); extern void dpm_resume_noirq(pm_message_t state); extern void dpm_resume_end(pm_message_t state); diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h index dfb078db8ebb..d35e783a598c 100644 --- a/include/linux/sysdev.h +++ b/include/linux/sysdev.h @@ -34,12 +34,6 @@ struct sysdev_class { struct list_head drivers; struct sysdev_class_attribute **attrs; struct kset kset; -#ifndef CONFIG_ARCH_NO_SYSDEV_OPS - /* Default operations for these types of devices */ - int (*shutdown)(struct sys_device *); - int (*suspend)(struct sys_device *, pm_message_t state); - int (*resume)(struct sys_device *); -#endif }; struct sysdev_class_attribute { @@ -77,11 +71,6 @@ struct sysdev_driver { struct list_head entry; int (*add)(struct sys_device *); int (*remove)(struct sys_device *); -#ifndef CONFIG_ARCH_NO_SYSDEV_OPS - int (*shutdown)(struct sys_device *); - int (*suspend)(struct sys_device *, pm_message_t state); - int (*resume)(struct sys_device *); -#endif }; -- cgit v1.2.2 From 8f389a99b652aab5b42297280bd94d95933ad12f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 May 2011 15:13:32 -0700 Subject: mm: use alloc_bootmem_node_nopanic() on really needed path Stefan found nobootmem does not work on his system that has only 8M of RAM. This causes an early panic: BIOS-provided physical RAM map: BIOS-88: 0000000000000000 - 000000000009f000 (usable) BIOS-88: 0000000000100000 - 0000000000840000 (usable) bootconsole [earlyser0] enabled Notice: NX (Execute Disable) protection missing in CPU or disabled in BIOS! DMI not present or invalid. last_pfn = 0x840 max_arch_pfn = 0x100000 init_memory_mapping: 0000000000000000-0000000000840000 8MB LOWMEM available. mapped low ram: 0 - 00840000 low ram: 0 - 00840000 Zone PFN ranges: DMA 0x00000001 -> 0x00001000 Normal empty Movable zone start PFN for each node early_node_map[2] active PFN ranges 0: 0x00000001 -> 0x0000009f 0: 0x00000100 -> 0x00000840 BUG: Int 6: CR2 (null) EDI c034663c ESI (null) EBP c0329f38 ESP c0329ef4 EBX c0346380 EDX 00000006 ECX ffffffff EAX fffffff4 err (null) EIP c0353191 CS c0320060 flg 00010082 Stack: (null) c030c533 000007cd (null) c030c533 00000001 (null) (null) 00000003 0000083f 00000018 00000002 00000002 c0329f6c c03534d6 (null) (null) 00000100 00000840 (null) c0329f64 00000001 00001000 (null) Pid: 0, comm: swapper Not tainted 2.6.36 #5 Call Trace: [] ? 0xc02e3707 [] 0xc035e6e5 [] ? 0xc0353191 [] 0xc03534d6 [] 0xc034f1cd [] 0xc034a824 [] ? 0xc03513cb [] 0xc0349432 [] 0xc0349066 It turns out that we should ignore the low limit of 16M. Use alloc_bootmem_node_nopanic() in this case. [akpm@linux-foundation.org: less mess] Signed-off-by: Yinghai LU Reported-by: Stefan Hellermann Tested-by: Stefan Hellermann Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: [2.6.34+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index b8613e806aa9..01eca1794e14 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -111,6 +111,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, __alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_node_nopanic(pgdat, x) \ + __alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node_nopanic(pgdat, x) \ -- cgit v1.2.2 From ee85c2e1454603ebb9f8d87223ac79dcdc87fa32 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 11 May 2011 15:13:34 -0700 Subject: mm: add alloc_pages_exact_nid() Add a alloc_pages_exact_nid() that allocates on a specific node. The naming is quite broken, but fixing that would need a larger renaming action. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: tweak comment] Signed-off-by: Andi Kleen Cc: Michal Hocko Cc: Balbir Singh Cc: KOSAKI Motohiro Cc: Dave Hansen Cc: David Rientjes Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index bfb8f934521e..56d8fc87fbbc 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -353,6 +353,8 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask); void *alloc_pages_exact(size_t size, gfp_t gfp_mask); void free_pages_exact(void *virt, size_t size); +/* This is different from alloc_pages_exact_node !!! */ +void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask), 0) -- cgit v1.2.2 From 1d929b7a84438ad9012c5826f5617d79a3efcef1 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 11 May 2011 15:13:39 -0700 Subject: mm: tracing: add missing GFP flags to tracing include/linux/gfp.h and include/trace/events/gfpflags.h are out of sync. When tracing is enabled, certain flags are not recognised and the text output is less useful as a result. Add the missing flags. Signed-off-by: Mel Gorman Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/gfpflags.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h index e3615c093741..9fe3a36646e9 100644 --- a/include/trace/events/gfpflags.h +++ b/include/trace/events/gfpflags.h @@ -10,6 +10,7 @@ */ #define show_gfp_flags(flags) \ (flags) ? __print_flags(flags, "|", \ + {(unsigned long)GFP_TRANSHUGE, "GFP_TRANSHUGE"}, \ {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"}, \ {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ {(unsigned long)GFP_USER, "GFP_USER"}, \ @@ -32,6 +33,9 @@ {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ - {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \ + {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ + {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ + {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \ + {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \ ) : "GFP_NOWAIT" -- cgit v1.2.2 From a75b9df9d3bfc3cd1083974c045ae31ce5f3434f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 May 2011 18:00:51 -0400 Subject: NFSv4.1: Ensure that layoutget uses the correct gfp modes Currently, writebacks may end up recursing back into the filesystem due to GFP_KERNEL direct reclaims in the pnfs subsystem. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 890dce242639..7e371f7df9c4 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -233,6 +233,7 @@ struct nfs4_layoutget { struct nfs4_layoutget_args args; struct nfs4_layoutget_res res; struct pnfs_layout_segment **lsegpp; + gfp_t gfp_flags; }; struct nfs4_getdeviceinfo_args { -- cgit v1.2.2 From 3e51e3edfd81bfd9853ad7de91167e4ce33d0fe7 Mon Sep 17 00:00:00 2001 From: Samir Bellabes Date: Wed, 11 May 2011 18:18:05 +0200 Subject: sched: Remove unused parameters from sched_fork() and wake_up_new_task() sched_fork() and wake_up_new_task() are defined with a parameter 'unsigned long clone_flags', which is unused. This patch removes the parameters. Signed-off-by: Samir Bellabes Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1305130685-1047-1-git-send-email-sam@synack.fr Signed-off-by: Ingo Molnar --- include/linux/sched.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6b4280b23ee6..12211e1666e2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2051,14 +2051,13 @@ extern void xtime_update(unsigned long ticks); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); -extern void wake_up_new_task(struct task_struct *tsk, - unsigned long clone_flags); +extern void wake_up_new_task(struct task_struct *tsk); #ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); #else static inline void kick_process(struct task_struct *tsk) { } #endif -extern void sched_fork(struct task_struct *p, int clone_flags); +extern void sched_fork(struct task_struct *p); extern void sched_dead(struct task_struct *p); extern void proc_caches_init(void); -- cgit v1.2.2 From 5db1256a5131d3b133946fa02ac9770a784e6eb2 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 12 May 2011 04:13:54 -0500 Subject: seqlock: Don't smp_rmb in seqlock reader spin loop Move the smp_rmb after cpu_relax loop in read_seqlock and add ACCESS_ONCE to make sure the test and return are consistent. A multi-threaded core in the lab didn't like the update from 2.6.35 to 2.6.36, to the point it would hang during boot when multiple threads were active. Bisection showed af5ab277ded04bd9bc6b048c5a2f0e7d70ef0867 (clockevents: Remove the per cpu tick skew) as the culprit and it is supported with stack traces showing xtime_lock waits including tick_do_update_jiffies64 and/or update_vsyscall. Experimentation showed the combination of cpu_relax and smp_rmb was significantly slowing the progress of other threads sharing the core, and this patch is effective in avoiding the hang. A theory is the rmb is affecting the whole core while the cpu_relax is causing a resource rebalance flush, together they cause an interfernce cadance that is unbroken when the seqlock reader has interrupts disabled. At first I was confused why the refactor in 3c22cd5709e8143444a6d08682a87f4c57902df3 (kernel: optimise seqlock) didn't affect this patch application, but after some study that affected seqcount not seqlock. The new seqcount was not factored back into the seqlock. I defer that the future. While the removal of the timer interrupt offset created contention for the xtime lock while a cpu does the additonal work to update the system clock, the seqlock implementation with the tight rmb spin loop goes back much further, and is just waiting for the right trigger. Cc: Signed-off-by: Milton Miller Cc: Cc: Linus Torvalds Cc: Andi Kleen Cc: Nick Piggin Cc: Benjamin Herrenschmidt Cc: Anton Blanchard Cc: Paul McKenney Acked-by: Eric Dumazet Link: http://lkml.kernel.org/r/%3Cseqlock-rmb%40mdm.bga.com%3E Signed-off-by: Thomas Gleixner --- include/linux/seqlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index e98cd2e57194..06d69648fc86 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -88,12 +88,12 @@ static __always_inline unsigned read_seqbegin(const seqlock_t *sl) unsigned ret; repeat: - ret = sl->sequence; - smp_rmb(); + ret = ACCESS_ONCE(sl->sequence); if (unlikely(ret & 1)) { cpu_relax(); goto repeat; } + smp_rmb(); return ret; } -- cgit v1.2.2 From 698b368275c3fa98261159253cfc79653f9dffc6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 11 May 2011 14:49:36 -0700 Subject: fbcon: add lifetime refcount to opened frame buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This just adds the refcount and the new registration lock logic. It does not (for example) actually change the read/write/ioctl routines to actually use the frame buffer that was opened: those function still end up alway susing whatever the current frame buffer is at the time of the call. Without this, if something holds the frame buffer open over a framebuffer switch, the close() operation after the switch will access a fb_info that has been free'd by the unregistering of the old frame buffer. (The read/write/ioctl operations will normally not cause problems, because they will - illogically - pick up the new fbcon instead. But a switch that happens just as one of those is going on might see problems too, the window is just much smaller: one individual op rather than the whole open-close sequence.) This use-after-free is apparently fairly easily triggered by the Ubuntu 11.04 boot sequence. Acked-by: Tim Gardner Tested-by: Daniel J Blueman Tested-by: Anca Emanuel Cc: Bruno Prémont Cc: Alan Cox Cc: Paul Mundt Cc: Dave Airlie Cc: Andy Whitcroft Signed-off-by: Linus Torvalds --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index df728c1c29ed..6a8274877171 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -832,6 +832,7 @@ struct fb_tile_ops { #define FBINFO_CAN_FORCE_OUTPUT 0x200000 struct fb_info { + atomic_t count; int node; int flags; struct mutex lock; /* Lock for open/release/ioctl funcs */ -- cgit v1.2.2 From ca06707022d6ba4744198a8ebbe4994786b0c613 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Fri, 6 May 2011 23:44:46 +0000 Subject: ipv6: restore correct ECN handling on TCP xmit Since commit e9df2e8fd8fbc9 (Use appropriate sock tclass setting for routing lookup) we lost ability to properly add ECN codemarks to ipv6 TCP frames. It seems like TCP_ECN_send() calls INET_ECN_xmit(), which only sets the ECN bit in the IPv4 ToS field (inet_sk(sk)->tos), but after the patch, what's checked is inet6_sk(sk)->tclass, which is a completely different field. Close bug https://bugzilla.kernel.org/show_bug.cgi?id=34322 [Eric Dumazet] : added the INET_ECN_dontxmit() fix and replace macros by inline functions for clarity. Signed-off-by: Steinar H. Gunderson Signed-off-by: Eric Dumazet Cc: YOSHIFUJI Hideaki Cc: Andrew Morton Signed-off-by: David S. Miller --- include/net/inet_ecn.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 88bdd010d65d..2fa8d1341a0a 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -38,9 +38,19 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) return outer; } -#define INET_ECN_xmit(sk) do { inet_sk(sk)->tos |= INET_ECN_ECT_0; } while (0) -#define INET_ECN_dontxmit(sk) \ - do { inet_sk(sk)->tos &= ~INET_ECN_MASK; } while (0) +static inline void INET_ECN_xmit(struct sock *sk) +{ + inet_sk(sk)->tos |= INET_ECN_ECT_0; + if (inet6_sk(sk) != NULL) + inet6_sk(sk)->tclass |= INET_ECN_ECT_0; +} + +static inline void INET_ECN_dontxmit(struct sock *sk) +{ + inet_sk(sk)->tos &= ~INET_ECN_MASK; + if (inet6_sk(sk) != NULL) + inet6_sk(sk)->tclass &= ~INET_ECN_MASK; +} #define IP6_ECN_flow_init(label) do { \ (label) &= ~htonl(INET_ECN_MASK << 20); \ -- cgit v1.2.2 From 47a150edc2ae734c0f4bf50aa19499e23b9a46f8 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Fri, 13 May 2011 04:27:54 +0100 Subject: Cache user_ns in struct cred If !CONFIG_USERNS, have current_user_ns() defined to (&init_user_ns). Get rid of _current_user_ns. This requires nsown_capable() to be defined in capability.c rather than as static inline in capability.h, so do that. Request_key needs init_user_ns defined at current_user_ns if !CONFIG_USERNS, so forward-declare that in cred.h if !CONFIG_USERNS at current_user_ns() define. Compile-tested with and without CONFIG_USERNS. Signed-off-by: Serge E. Hallyn [ This makes a huge performance difference for acl_permission_check(), up to 30%. And that is one of the hottest kernel functions for loads that are pathname-lookup heavy. ] Signed-off-by: Linus Torvalds --- include/linux/capability.h | 13 +------------ include/linux/cred.h | 10 ++++++++-- 2 files changed, 9 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/capability.h b/include/linux/capability.h index 16ee8b49a200..d4675af963fa 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -546,18 +546,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap); extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); - -/** - * nsown_capable - Check superior capability to one's own user_ns - * @cap: The capability in question - * - * Return true if the current task has the given superior capability - * targeted at its own user namespace. - */ -static inline bool nsown_capable(int cap) -{ - return ns_capable(current_user_ns(), cap); -} +extern bool nsown_capable(int cap); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/include/linux/cred.h b/include/linux/cred.h index 9aeeb0ba2003..be16b61283cc 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -146,6 +146,7 @@ struct cred { void *security; /* subjective LSM security */ #endif struct user_struct *user; /* real user ID subscription */ + struct user_namespace *user_ns; /* cached user->user_ns */ struct group_info *group_info; /* supplementary groups for euid/fsgid */ struct rcu_head rcu; /* RCU deletion hook */ }; @@ -354,10 +355,15 @@ static inline void put_cred(const struct cred *_cred) #define current_fsgid() (current_cred_xxx(fsgid)) #define current_cap() (current_cred_xxx(cap_effective)) #define current_user() (current_cred_xxx(user)) -#define _current_user_ns() (current_cred_xxx(user)->user_ns) #define current_security() (current_cred_xxx(security)) -extern struct user_namespace *current_user_ns(void); +#ifdef CONFIG_USER_NS +#define current_user_ns() (current_cred_xxx(user_ns)) +#else +extern struct user_namespace init_user_ns; +#define current_user_ns() (&init_user_ns) +#endif + #define current_uid_gid(_uid, _gid) \ do { \ -- cgit v1.2.2 From a10e14667635dde504ed9e7ee851494c2cf2ae8e Mon Sep 17 00:00:00 2001 From: Vitalii Demianets Date: Thu, 12 May 2011 23:04:29 +0000 Subject: bonding,llc: Fix structure sizeof incompatibility for some PDUs With some combinations of arch/compiler (e.g. arm-linux-gcc) the sizeof operator on structure returns value greater than expected. In cases when the structure is used for mapping PDU fields it may lead to unexpected results (such as holes and alignment problems in skb data). __packed prevents this undesired behavior. Signed-off-by: Vitalii Demianets Signed-off-by: David S. Miller --- include/net/llc_pdu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 75b8e2968c9b..f57e7d46a453 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -199,7 +199,7 @@ struct llc_pdu_sn { u8 ssap; u8 ctrl_1; u8 ctrl_2; -}; +} __packed; static inline struct llc_pdu_sn *llc_pdu_sn_hdr(struct sk_buff *skb) { @@ -211,7 +211,7 @@ struct llc_pdu_un { u8 dsap; u8 ssap; u8 ctrl_1; -}; +} __packed; static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb) { @@ -359,7 +359,7 @@ struct llc_xid_info { u8 fmt_id; /* always 0x81 for LLC */ u8 type; /* different if NULL/non-NULL LSAP */ u8 rw; /* sender receive window */ -}; +} __packed; /** * llc_pdu_init_as_xid_cmd - sets bytes 3, 4 & 5 of LLC header as XID @@ -415,7 +415,7 @@ struct llc_frmr_info { u8 curr_ssv; /* current send state variable val */ u8 curr_rsv; /* current receive state variable */ u8 ind_bits; /* indicator bits set with macro */ -}; +} __packed; extern void llc_pdu_set_cmd_rsp(struct sk_buff *skb, u8 type); extern void llc_pdu_set_pf_bit(struct sk_buff *skb, u8 bit_value); -- cgit v1.2.2 From 82a3242e11d9e63c8195be46c954efaefee35e22 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 12 May 2011 16:01:02 -0700 Subject: sysfs: remove "last sysfs file:" line from the oops messages On some arches (x86, sh, arm, unicore, powerpc) the oops message would print out the last sysfs file accessed. This was very useful in finding a number of sysfs and driver core bugs in the 2.5 and early 2.6 development days, but it has been a number of years since this file has actually helped in debugging anything that couldn't also be trivially determined from the stack traceback. So it's time to delete the line. This is good as we need all the space we can get for oops messages at times on consoles. Acked-by: Phil Carmody Acked-by: Ingo Molnar Cc: Andrew Morton Cc: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 30b881555fa5..c3acda60eee0 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -176,7 +176,6 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, const unsigned char *name); struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd); void sysfs_put(struct sysfs_dirent *sd); -void sysfs_printk_last_file(void); /* Called to clear a ns tag when it is no longer valid */ void sysfs_exit_ns(enum kobj_ns_type type, const void *tag); @@ -348,10 +347,6 @@ static inline int __must_check sysfs_init(void) return 0; } -static inline void sysfs_printk_last_file(void) -{ -} - #endif /* CONFIG_SYSFS */ #endif /* _SYSFS_H_ */ -- cgit v1.2.2 From 8c414ff3f4dcdde228c6a668385218290d73a265 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 8 May 2011 18:50:20 +0100 Subject: clocksource: convert footbridge to generic i8253 clocksource Convert the footbridge isa-timer code to use generic i8253 clocksource. Acked-by: John Stultz Acked-by: Thomas Gleixner Signed-off-by: Russell King --- include/linux/clocksource.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index c37b21ad5a3b..f13469b3df86 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -341,4 +341,6 @@ static inline void update_vsyscall_tz(void) extern void timekeeping_notify(struct clocksource *clock); +extern int clocksource_i8253_init(void); + #endif /* _LINUX_CLOCKSOURCE_H */ -- cgit v1.2.2 From e1e8fb6a1ff3f9487e03a4cbf85b81d1316068ce Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 15 Apr 2011 03:02:49 +0000 Subject: fs: remove FS_COW_FL FS_COW_FL and FS_NOCOW_FL were newly introduced to control per file COW in btrfs, but FS_NOCOW_FL is sufficient. The fact is we don't have corresponding BTRFS_INODE_COW flag. COW is default, and FS_NOCOW_FL can be used to switch off COW for a single file. If we mount btrfs with nodatacow, a newly created file will be set with the FS_NOCOW_FL flag. So to turn on COW for it, we can just clear the FS_NOCOW_FL flag. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index de9dd8119b71..56a41412903d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -365,7 +365,6 @@ struct inodes_stat_t { #define FS_EXTENT_FL 0x00080000 /* Extents */ #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ -#define FS_COW_FL 0x02000000 /* Cow file */ #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -- cgit v1.2.2 From 752d2635ebb12b6122ba05775f7d1ccfef14b275 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 22 Apr 2011 11:03:57 +0100 Subject: drm: Take lock around probes for drm_fb_helper_hotplug_event We need to hold the dev->mode_config.mutex whilst detecting the output status. But we also need to drop it for the call into drm_fb_helper_single_fb_probe(), which indirectly acquires the lock when attaching the fbcon. Failure to do so exposes a race with normal output probing. Detected by adding some warnings that the mutex is held to the backend detect routines: [ 17.772456] WARNING: at drivers/gpu/drm/i915/intel_crt.c:471 intel_crt_detect+0x3e/0x373 [i915]() [ 17.772458] Hardware name: Latitude E6400 [ 17.772460] Modules linked in: .... [ 17.772582] Pid: 11, comm: kworker/0:1 Tainted: G W 2.6.38.4-custom.2 #8 [ 17.772584] Call Trace: [ 17.772591] [] ? warn_slowpath_common+0x78/0x8c [ 17.772603] [] ? intel_crt_detect+0x3e/0x373 [i915] [ 17.772612] [] ? drm_helper_probe_single_connector_modes+0xbf/0x2af [drm_kms_helper] [ 17.772619] [] ? drm_fb_helper_probe_connector_modes+0x39/0x4d [drm_kms_helper] [ 17.772625] [] ? drm_fb_helper_hotplug_event+0xa5/0xc3 [drm_kms_helper] [ 17.772633] [] ? output_poll_execute+0x146/0x17c [drm_kms_helper] [ 17.772638] [] ? cfq_init_queue+0x247/0x345 [ 17.772644] [] ? output_poll_execute+0x0/0x17c [drm_kms_helper] [ 17.772648] [] ? process_one_work+0x193/0x28e [ 17.772652] [] ? worker_thread+0xef/0x172 [ 17.772655] [] ? worker_thread+0x0/0x172 [ 17.772658] [] ? worker_thread+0x0/0x172 [ 17.772663] [] ? kthread+0x7a/0x82 [ 17.772668] [] ? kernel_thread_helper+0x4/0x10 [ 17.772671] [] ? kthread+0x0/0x82 [ 17.772674] [] ? kernel_thread_helper+0x0/0x10 Reported-by: Frederik Himpe References: https://bugs.freedesktop.org/show_bug.cgi?id=36394 Signed-off-by: Chris Wilson Signed-off-by: Dave Airlie --- include/drm/drm_fb_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index ade09d7b4271..c99c3d3e7811 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -127,7 +127,7 @@ void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch, int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info); -bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper); +int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper); bool drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper, int bpp_sel); int drm_fb_helper_single_add_all_connectors(struct drm_fb_helper *fb_helper); int drm_fb_helper_debug_enter(struct fb_info *info); -- cgit v1.2.2 From 86f315bbb2374f1f077500ad131dd9b71856e697 Mon Sep 17 00:00:00 2001 From: Chris Ball Date: Mon, 16 May 2011 11:32:26 -0400 Subject: Revert "mmc: fix a race between card-detect rescan and clock-gate work instances" This reverts commit 26fc8775b51484d8c0a671198639c6d5ae60533e, which has been reported to cause boot/resume-time crashes for some users: https://bbs.archlinux.org/viewtopic.php?id=118751. Signed-off-by: Chris Ball Cc: --- include/linux/mmc/host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index eb792cb6d745..bcb793ec7374 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -183,6 +183,7 @@ struct mmc_host { struct work_struct clk_gate_work; /* delayed clock gate */ unsigned int clk_old; /* old clock value cache */ spinlock_t clk_lock; /* lock for clk fields */ + struct mutex clk_gate_mutex; /* mutex for clock gating */ #endif /* host specific block data */ -- cgit v1.2.2 From 2064af917b3ba7589070064ca4ed12cecd99a63c Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Fri, 29 Apr 2011 00:37:26 +0200 Subject: PM: Revert "driver core: platform_bus: allow runtime override of dev_pm_ops" The platform_bus_set_pm_ops() operation is deprecated in favor of the new device power domain infrastructre implemented in commit 7538e3db6e015e890825fbd9f8659952896ddd5b (PM: add support for device power domains) Signed-off-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/platform_device.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index e0093e061b08..ede1a80e3358 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -150,9 +150,6 @@ extern struct platform_device *platform_create_bundle(struct platform_driver *dr struct resource *res, unsigned int n_res, const void *data, size_t size); -extern const struct dev_pm_ops * platform_bus_get_pm_ops(void); -extern void platform_bus_set_pm_ops(const struct dev_pm_ops *pm); - /* early platform driver interface */ struct early_platform_driver { const char *class_str; -- cgit v1.2.2 From f0201738b61b1adcf6b2c4719c5c415745014c1c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 12 Apr 2011 19:06:39 -0400 Subject: ftrace: Avoid recording mcount on .init sections directly The init and exit sections should not be traced and adding a call to mcount to them is a waste of text and instruction cache. Have the macro section attributes include notrace to ignore these functions for tracing from the build. Link: http://lkml.kernel.org/r/20110421023738.953028219@goodmis.org Signed-off-by: Steven Rostedt --- include/linux/init.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/init.h b/include/linux/init.h index 577671c55153..9146f39cdddf 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -79,29 +79,29 @@ #define __exitused __used #endif -#define __exit __section(.exit.text) __exitused __cold +#define __exit __section(.exit.text) __exitused __cold notrace /* Used for HOTPLUG */ -#define __devinit __section(.devinit.text) __cold +#define __devinit __section(.devinit.text) __cold notrace #define __devinitdata __section(.devinit.data) #define __devinitconst __section(.devinit.rodata) -#define __devexit __section(.devexit.text) __exitused __cold +#define __devexit __section(.devexit.text) __exitused __cold notrace #define __devexitdata __section(.devexit.data) #define __devexitconst __section(.devexit.rodata) /* Used for HOTPLUG_CPU */ -#define __cpuinit __section(.cpuinit.text) __cold +#define __cpuinit __section(.cpuinit.text) __cold notrace #define __cpuinitdata __section(.cpuinit.data) #define __cpuinitconst __section(.cpuinit.rodata) -#define __cpuexit __section(.cpuexit.text) __exitused __cold +#define __cpuexit __section(.cpuexit.text) __exitused __cold notrace #define __cpuexitdata __section(.cpuexit.data) #define __cpuexitconst __section(.cpuexit.rodata) /* Used for MEMORY_HOTPLUG */ -#define __meminit __section(.meminit.text) __cold +#define __meminit __section(.meminit.text) __cold notrace #define __meminitdata __section(.meminit.data) #define __meminitconst __section(.meminit.rodata) -#define __memexit __section(.memexit.text) __exitused __cold +#define __memexit __section(.memexit.text) __exitused __cold notrace #define __memexitdata __section(.memexit.data) #define __memexitconst __section(.memexit.rodata) -- cgit v1.2.2 From 9937a5e2f32892db0dbeefc2b3bc74b3ae3ea9c7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 May 2011 11:04:44 +0200 Subject: scsi: remove performance regression due to async queue run Commit c21e6beb removed our queue request_fn re-enter protection, and defaulted to always running the queues from kblockd to be safe. This was a known potential slow down, but should be safe. Unfortunately this is causing big performance regressions for some, so we need to improve this logic. Looking into the details of the re-enter, the real issue is on requeue of requests. Requeue of requests upon seeing a BUSY condition from the device ends up re-running the queue, causing traces like this: scsi_request_fn() scsi_dispatch_cmd() scsi_queue_insert() __scsi_queue_insert() scsi_run_queue() scsi_request_fn() ... potentially causing the issue we want to avoid. So special case the requeue re-run of the queue, but improve it to offload the entire run of local queue and starved queue from a single workqueue callback. This is a lot better than potentially kicking off a workqueue run for each device seen. This also fixes the issue of the local device going into recursion, since the above mentioned commit never moved that queue run out of line. Signed-off-by: Jens Axboe --- include/scsi/scsi_device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 2d3ec5094685..dd82e02ddde3 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -169,6 +169,7 @@ struct scsi_device { sdev_dev; struct execute_work ew; /* used to get process context on put */ + struct work_struct requeue_work; struct scsi_dh_data *scsi_dh_data; enum scsi_device_state sdev_state; -- cgit v1.2.2 From a144c6a6c924aa1da04dd77fb84b89927354fdff Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 6 May 2011 20:09:42 +0200 Subject: PM: Print a warning if firmware is requested when tasks are frozen Some drivers erroneously use request_firmware() from their ->resume() (or ->thaw(), or ->restore()) callbacks, which is not going to work unless the firmware has been built in. This causes system resume to stall until the firmware-loading timeout expires, which makes users think that the resume has failed and reboot their machines unnecessarily. For this reason, make _request_firmware() print a warning and return immediately with error code if it has been called when tasks are frozen and it's impossible to start any new usermode helpers. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Reviewed-by: Valdis Kletnieks --- include/linux/kmod.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 6efd7a78de6a..7f3dbcb78116 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -111,7 +111,12 @@ call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait) extern void usermodehelper_init(void); +#ifdef CONFIG_PM_SLEEP extern int usermodehelper_disable(void); extern void usermodehelper_enable(void); +extern bool usermodehelper_is_disabled(void); +#else +static inline bool usermodehelper_is_disabled(void) { return false; } +#endif #endif /* __LINUX_KMOD_H__ */ -- cgit v1.2.2 From 13d53f8775c6a00b070a3eef6833795412eb7fcd Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 10 May 2011 21:27:34 +0200 Subject: kmod: always provide usermodehelper_disable() We need to prevent kernel-forked processes during system poweroff. Such processes try to access the filesystem whose disks we are trying to shutdown at the same time. This causes delays and exceptions in the storage drivers. A follow-up patch will add these calls and need usermodehelper_disable() also on systems without suspend support. Signed-off-by: Kay Sievers Signed-off-by: Rafael J. Wysocki --- include/linux/kmod.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 7f3dbcb78116..310231823852 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -111,12 +111,8 @@ call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait) extern void usermodehelper_init(void); -#ifdef CONFIG_PM_SLEEP extern int usermodehelper_disable(void); extern void usermodehelper_enable(void); extern bool usermodehelper_is_disabled(void); -#else -static inline bool usermodehelper_is_disabled(void) { return false; } -#endif #endif /* __LINUX_KMOD_H__ */ -- cgit v1.2.2 From 91e7c75ba93c48a82670d630b9daac92ff70095d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 17 May 2011 23:26:00 +0200 Subject: PM: Allow drivers to allocate memory from .prepare() callbacks safely If device drivers allocate substantial amounts of memory (above 1 MB) in their hibernate .freeze() callbacks (or in their legacy suspend callbcks during hibernation), the subsequent creation of hibernate image may fail due to the lack of memory. This is the case, because the drivers' .freeze() callbacks are executed after the hibernate memory preallocation has been carried out and the preallocated amount of memory may be too small to cover the new driver allocations. Unfortunately, the drivers' .prepare() callbacks also are executed after the hibernate memory preallocation has completed, so they are not suitable for allocating additional memory either. Thus the only way a driver can safely allocate memory during hibernation is to use a hibernate/suspend notifier. However, the notifiers are called before the freezing of user space and the drivers wanting to use them for allocating additional memory may not know how much memory needs to be allocated at that point. To let device drivers overcome this difficulty rework the hibernation sequence so that the memory preallocation is carried out after the drivers' .prepare() callbacks have been executed, so that the .prepare() callbacks can be used for allocating additional memory to be used by the drivers' .freeze() callbacks. Update documentation to match the new behavior of the code. Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index 3cc3e7e589f0..dce7c7148771 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -533,10 +533,14 @@ struct dev_power_domain { extern void device_pm_lock(void); extern void dpm_resume_noirq(pm_message_t state); extern void dpm_resume_end(pm_message_t state); +extern void dpm_resume(pm_message_t state); +extern void dpm_complete(pm_message_t state); extern void device_pm_unlock(void); extern int dpm_suspend_noirq(pm_message_t state); extern int dpm_suspend_start(pm_message_t state); +extern int dpm_suspend(pm_message_t state); +extern int dpm_prepare(pm_message_t state); extern void __suspend_report_result(const char *function, void *fn, int ret); -- cgit v1.2.2 From 6538df80194e305f1b78cafb556f4bb442f808b3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 17 May 2011 23:26:21 +0200 Subject: PM: Introduce generic prepare and complete callbacks for subsystems Introduce generic .prepare() and .complete() power management callbacks, currently missing, that can be used by subsystems and power domains and export them. Provide NULL definitions of all the generic system sleep callbacks for CONFIG_PM_SLEEP unset. Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index dce7c7148771..3160648ccdda 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -550,6 +550,16 @@ extern void __suspend_report_result(const char *function, void *fn, int ret); } while (0) extern int device_pm_wait_for_dev(struct device *sub, struct device *dev); + +extern int pm_generic_prepare(struct device *dev); +extern int pm_generic_suspend(struct device *dev); +extern int pm_generic_resume(struct device *dev); +extern int pm_generic_freeze(struct device *dev); +extern int pm_generic_thaw(struct device *dev); +extern int pm_generic_restore(struct device *dev); +extern int pm_generic_poweroff(struct device *dev); +extern void pm_generic_complete(struct device *dev); + #else /* !CONFIG_PM_SLEEP */ #define device_pm_lock() do {} while (0) @@ -566,6 +576,15 @@ static inline int device_pm_wait_for_dev(struct device *a, struct device *b) { return 0; } + +#define pm_generic_prepare NULL +#define pm_generic_suspend NULL +#define pm_generic_resume NULL +#define pm_generic_freeze NULL +#define pm_generic_thaw NULL +#define pm_generic_restore NULL +#define pm_generic_poweroff NULL +#define pm_generic_complete NULL #endif /* !CONFIG_PM_SLEEP */ /* How to reorder dpm_list after device_move() */ @@ -576,11 +595,4 @@ enum dpm_order { DPM_ORDER_DEV_LAST, }; -extern int pm_generic_suspend(struct device *dev); -extern int pm_generic_resume(struct device *dev); -extern int pm_generic_freeze(struct device *dev); -extern int pm_generic_thaw(struct device *dev); -extern int pm_generic_restore(struct device *dev); -extern int pm_generic_poweroff(struct device *dev); - #endif /* _LINUX_PM_H */ -- cgit v1.2.2 From f12a20fc9bfba4218ecbc4e40c8e08dc2a85dc99 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 17 May 2011 15:44:12 -0700 Subject: procfs: add stub for proc_mkdir_mode() Provide a stub for proc_mkdir_mode() when CONFIG_PROC_FS is not enabled, just like the stub for proc_mkdir(). Fixes this linux-next build error: drivers/net/wireless/airo.c:4504: error: implicit declaration of function 'proc_mkdir_mode' Signed-off-by: Randy Dunlap Cc: Stephen Rothwell Cc: Alexey Dobriyan Cc: "John W. Linville" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 838c1149251a..eaf4350c0f90 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -208,6 +208,8 @@ static inline struct proc_dir_entry *proc_symlink(const char *name, struct proc_dir_entry *parent,const char *dest) {return NULL;} static inline struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) {return NULL;} +static inline struct proc_dir_entry *proc_mkdir_mode(const char *name, + mode_t mode, struct proc_dir_entry *parent) { return NULL; } static inline struct proc_dir_entry *create_proc_read_entry(const char *name, mode_t mode, struct proc_dir_entry *base, -- cgit v1.2.2 From fe12bc2c996d3e492b2920e32ac79f7bbae3e15d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 May 2011 12:48:00 +0200 Subject: genirq: Uninline and sanity check generic_handle_irq() generic_handle_irq() is missing a NULL pointer check for the result of irq_to_desc. This was a not a big problem, but we want to expose it to drivers, so we better have sanity checks in place. Add a return value as well, which indicates that the irq number was valid and the handler was invoked. Based on the pure code move from Jonathan Cameron. Signed-off-by: Thomas Gleixner Cc: Jonathan Cameron --- include/linux/irqdesc.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index c70b1aa4b93a..2d921b35212c 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -111,10 +111,7 @@ static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *de desc->handle_irq(irq, desc); } -static inline void generic_handle_irq(unsigned int irq) -{ - generic_handle_irq_desc(irq, irq_to_desc(irq)); -} +int generic_handle_irq(unsigned int irq); /* Test to see if a driver has successfully requested an irq */ static inline int irq_has_action(unsigned int irq) -- cgit v1.2.2 From 01294d82622d6d9d64bde8e4530c7e2c6dbb6ee6 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Wed, 18 May 2011 10:27:39 -0500 Subject: of: fix race when matching drivers If two drivers are probing devices at the same time, both will write their match table result to the dev->of_match cache at the same time. Only write the result if the device matches. In a thread titled "SBus devices sometimes detected, sometimes not", Meelis reported his SBus hme was not detected about 50% of the time. From the debug suggested by Grant it was obvious another driver matched some devices between the call to match the hme and the hme discovery failling. Reported-by: Meelis Roos Signed-off-by: Milton Miller [grant.likely: modified to only call of_match_device() once] Signed-off-by: Grant Likely --- include/linux/of_device.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 8bfe6c1d4365..b33d68814a73 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -21,8 +21,12 @@ extern void of_device_make_bus_id(struct device *dev); static inline int of_driver_match_device(struct device *dev, const struct device_driver *drv) { - dev->of_match = of_match_device(drv->of_match_table, dev); - return dev->of_match != NULL; + const struct of_device_id *match; + + match = of_match_device(drv->of_match_table, dev); + if (match) + dev->of_match = match; + return match != NULL; } extern struct platform_device *of_dev_get(struct platform_device *dev); -- cgit v1.2.2 From b1608d69cb804e414d0887140ba08a9398e4e638 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 18 May 2011 11:19:24 -0600 Subject: drivercore: revert addition of of_match to struct device Commit b826291c, "drivercore/dt: add a match table pointer to struct device" added an of_match pointer to struct device to cache the of_match_table entry discovered at driver match time. This was unsafe because matching is not an atomic operation with probing a driver. If two or more drivers are attempted to be matched to a driver at the same time, then the cached matching entry pointer could get overwritten. This patch reverts the of_match cache pointer and reworks all users to call of_match_device() directly instead. Signed-off-by: Grant Likely --- include/linux/device.h | 1 - include/linux/of_device.h | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index ab8dfc095709..d08399db6e2c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -442,7 +442,6 @@ struct device { struct dev_archdata archdata; struct device_node *of_node; /* associated device tree node */ - const struct of_device_id *of_match; /* matching of_device_id from driver */ dev_t devt; /* dev_t, creates the sysfs "dev" */ diff --git a/include/linux/of_device.h b/include/linux/of_device.h index b33d68814a73..ae5638480ef2 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -21,12 +21,7 @@ extern void of_device_make_bus_id(struct device *dev); static inline int of_driver_match_device(struct device *dev, const struct device_driver *drv) { - const struct of_device_id *match; - - match = of_match_device(drv->of_match_table, dev); - if (match) - dev->of_match = match; - return match != NULL; + return of_match_device(drv->of_match_table, dev) != NULL; } extern struct platform_device *of_dev_get(struct platform_device *dev); @@ -62,6 +57,11 @@ static inline int of_device_uevent(struct device *dev, static inline void of_device_node_put(struct device *dev) { } +static inline const struct of_device_id *of_match_device( + const struct of_device_id *matches, const struct device *dev) +{ + return NULL; +} #endif /* CONFIG_OF_DEVICE */ #endif /* _LINUX_OF_DEVICE_H */ -- cgit v1.2.2 From b448c4e3ae6d20108dba1d7833f2c0d3dbad87ce Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 29 Apr 2011 15:12:32 -0400 Subject: ftrace: Replace FTRACE_FL_NOTRACE flag with a hash of ignored functions To prepare for the accounting system that will allow multiple users of the function tracer, having the FTRACE_FL_NOTRACE as a flag in the dyn_trace record does not make sense. All ftrace_ops will soon have a hash of functions they should trace and not trace. By making a global hash of functions not to trace makes this easier for the transition. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 32047449b309..fe0a90a09243 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -149,7 +149,6 @@ enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FILTER = (1 << 1), FTRACE_FL_ENABLED = (1 << 2), - FTRACE_FL_NOTRACE = (1 << 3), }; struct dyn_ftrace { -- cgit v1.2.2 From 1cf41dd79993389b012e4542ab502ce36ae7343f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 29 Apr 2011 20:59:51 -0400 Subject: ftrace: Use hash instead for FTRACE_FL_FILTER When multiple users are allowed to have their own set of functions to trace, having the FTRACE_FL_FILTER flag will not be enough to handle the accounting of those users. Each user will need their own set of functions. Replace the FTRACE_FL_FILTER with a filter_hash instead. This is temporary until the rest of the function filtering accounting gets in. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fe0a90a09243..52fc5d4e6edd 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -147,8 +147,7 @@ extern int ftrace_text_reserved(void *start, void *end); enum { FTRACE_FL_FREE = (1 << 0), - FTRACE_FL_FILTER = (1 << 1), - FTRACE_FL_ENABLED = (1 << 2), + FTRACE_FL_ENABLED = (1 << 1), }; struct dyn_ftrace { -- cgit v1.2.2 From f45948e898e7bc76a73a468796d2ce80dd040058 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 May 2011 12:29:25 -0400 Subject: ftrace: Create a global_ops to hold the filter and notrace hashes Combine the filter and notrace hashes to be accessed by a single entity, the global_ops. The global_ops is a ftrace_ops structure that is passed to different functions that can read or modify the filtering of the function tracer. The ftrace_ops structure was modified to hold a filter and notrace hashes so that later patches may allow each ftrace_ops to have its own set of rules to what functions may be filtered. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 52fc5d4e6edd..6658a51390fe 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -29,9 +29,15 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); +struct ftrace_hash; + struct ftrace_ops { - ftrace_func_t func; - struct ftrace_ops *next; + ftrace_func_t func; + struct ftrace_ops *next; +#ifdef CONFIG_DYNAMIC_FTRACE + struct ftrace_hash *notrace_hash; + struct ftrace_hash *filter_hash; +#endif }; extern int function_trace_stop; -- cgit v1.2.2 From ed926f9b35cda0988234c356e16a7cb30f4e5338 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 3 May 2011 13:25:24 -0400 Subject: ftrace: Use counters to enable functions to trace Every function has its own record that stores the instruction pointer and flags for the function to be traced. There are only two flags: enabled and free. The enabled flag states that tracing for the function has been enabled (actively traced), and the free flag states that the record no longer points to a function and can be used by new functions (loaded modules). These flags are now moved to the MSB of the flags (actually just the top 32bits). The rest of the bits (30 bits) are now used as a ref counter. Everytime a tracer register functions to trace, those functions will have its counter incremented. When tracing is enabled, to determine if a function should be traced, the counter is examined, and if it is non-zero it is set to trace. When a ftrace_ops is registered to trace functions, its hashes are examined. If the ftrace_ops filter_hash count is zero, then all functions are set to be traced, otherwise only the functions in the hash are to be traced. The exception to this is if a function is also in the ftrace_ops notrace_hash. Then that function's counter is not incremented for this ftrace_ops. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 6658a51390fe..ab1c46e70bb6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -37,6 +37,7 @@ struct ftrace_ops { #ifdef CONFIG_DYNAMIC_FTRACE struct ftrace_hash *notrace_hash; struct ftrace_hash *filter_hash; + unsigned long flags; #endif }; @@ -152,10 +153,13 @@ extern void unregister_ftrace_function_probe_all(char *glob); extern int ftrace_text_reserved(void *start, void *end); enum { - FTRACE_FL_FREE = (1 << 0), - FTRACE_FL_ENABLED = (1 << 1), + FTRACE_FL_ENABLED = (1 << 30), + FTRACE_FL_FREE = (1 << 31), }; +#define FTRACE_FL_MASK (0x3UL << 30) +#define FTRACE_REF_MAX ((1 << 30) - 1) + struct dyn_ftrace { union { unsigned long ip; /* address of mcount call-site */ -- cgit v1.2.2 From b848914ce39589d89ee0078a6d1ef452b464729e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 May 2011 09:27:52 -0400 Subject: ftrace: Implement separate user function filtering ftrace_ops that are registered to trace functions can now be agnostic to each other in respect to what functions they trace. Each ops has their own hash of the functions they want to trace and a hash to what they do not want to trace. A empty hash for the functions they want to trace denotes all functions should be traced that are not in the notrace hash. Cc: Paul E. McKenney Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ab1c46e70bb6..4609c0ece79a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -31,13 +31,18 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); struct ftrace_hash; +enum { + FTRACE_OPS_FL_ENABLED = 1 << 0, + FTRACE_OPS_FL_GLOBAL = 1 << 1, +}; + struct ftrace_ops { ftrace_func_t func; struct ftrace_ops *next; + unsigned long flags; #ifdef CONFIG_DYNAMIC_FTRACE struct ftrace_hash *notrace_hash; struct ftrace_hash *filter_hash; - unsigned long flags; #endif }; -- cgit v1.2.2 From cdbe61bfe70440939e457fb4a8d0995eaaed17de Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 May 2011 21:14:55 -0400 Subject: ftrace: Allow dynamically allocated function tracers Now that functions may be selected individually, it only makes sense that we should allow dynamically allocated trace structures to be traced. This will allow perf to allocate a ftrace_ops structure at runtime and use it to pick and choose which functions that structure will trace. Note, a dynamically allocated ftrace_ops will always be called indirectly instead of being called directly from the mcount in entry.S. This is because there's no safe way to prevent mcount from being preempted before calling the function, unless we modify every entry.S to do so (not likely). Thus, dynamically allocated functions will now be called by the ftrace_ops_list_func() that loops through the ops that are allocated if there are more than one op allocated at a time. This loop is protected with a preempt_disable. To determine if an ftrace_ops structure is allocated or not, a new util function was added to the kernel/extable.c called core_kernel_data(), which returns 1 if the address is between _sdata and _edata. Cc: Paul E. McKenney Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 1 + include/linux/kernel.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4609c0ece79a..caba694a62b6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -34,6 +34,7 @@ struct ftrace_hash; enum { FTRACE_OPS_FL_ENABLED = 1 << 0, FTRACE_OPS_FL_GLOBAL = 1 << 1, + FTRACE_OPS_FL_DYNAMIC = 1 << 2, }; struct ftrace_ops { diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 00cec4dc0ae2..f37ba716ef8b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -283,6 +283,7 @@ extern char *get_options(const char *str, int nints, int *ints); extern unsigned long long memparse(const char *ptr, char **retptr); extern int core_kernel_text(unsigned long addr); +extern int core_kernel_data(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); extern int func_ptr_is_kernel_text(void *ptr); -- cgit v1.2.2 From 936e074b286ae779f134312178dbab139ee7ea52 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 May 2011 22:54:01 -0400 Subject: ftrace: Modify ftrace_set_filter/notrace to take ops Since users of the function tracer can now pick and choose which functions they want to trace agnostically from other users of the function tracer, we need to pass the ops struct to the ftrace_set_filter() functions. The functions ftrace_set_global_filter() and ftrace_set_global_notrace() is added to keep the old filter functions which are used to modify the generic function tracers. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index caba694a62b6..9d88e1cb5dbb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -179,7 +179,12 @@ struct dyn_ftrace { }; int ftrace_force_update(void); -void ftrace_set_filter(unsigned char *buf, int len, int reset); +void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, + int len, int reset); +void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, + int len, int reset); +void ftrace_set_global_filter(unsigned char *buf, int len, int reset); +void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); int register_ftrace_command(struct ftrace_func_command *cmd); int unregister_ftrace_command(struct ftrace_func_command *cmd); -- cgit v1.2.2 From b4bc842802db3314f9a657094da0450a903ea619 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 7 Feb 2011 16:02:25 -0800 Subject: module: deal with alignment issues in built-in module versions On m68k natural alignment is 2-byte boundary but we are trying to align structures in __modver section on sizeof(void *) boundary. This causes trouble when we try to access elements in this section in array-like fashion when create "version" attributes for built-in modules. Moreover, as DaveM said, we can't reliably put structures into independent objects, put them into a special section, and then expect array access over them (via the section boundaries) after linking the objects together to just "work" due to variable alignment choices in different situations. The only solution that seems to work reliably is to make an array of plain pointers to the objects in question and put those pointers in the special section. Reported-by: Geert Uytterhoeven Signed-off-by: Dmitry Torokhov Signed-off-by: Rusty Russell --- include/linux/module.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 5de42043dff0..4cfebc211695 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -174,10 +174,7 @@ extern struct module __this_module; #define MODULE_VERSION(_version) \ extern ssize_t __modver_version_show(struct module_attribute *, \ struct module *, char *); \ - static struct module_version_attribute __modver_version_attr \ - __used \ - __attribute__ ((__section__ ("__modver"),aligned(sizeof(void *)))) \ - = { \ + static struct module_version_attribute ___modver_attr = { \ .mattr = { \ .attr = { \ .name = "version", \ @@ -187,7 +184,10 @@ extern struct module __this_module; }, \ .module_name = KBUILD_MODNAME, \ .version = _version, \ - } + }; \ + static const struct module_version_attribute \ + __used __attribute__ ((__section__ ("__modver"))) \ + * __moduleparam_const __modver_attr = &___modver_attr #endif /* Optional firmware file (or files) needed by the module -- cgit v1.2.2 From 9b73a5840c7d5f77e5766626716df13787cb258c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 7 Feb 2011 16:02:27 -0800 Subject: module: do not hide __modver_version_show declaration behind ifdef Doing so prevents the following warning from sparse: CHECK kernel/params.c kernel/params.c:817:9: warning: symbol '__modver_version_show' was not declared. Should it be static? since kernel/params.c is never compiled with MODULE being set. Signed-off-by: Dmitry Torokhov Signed-off-by: Rusty Russell --- include/linux/module.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 4cfebc211695..23996ad147e7 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -64,6 +64,9 @@ struct module_version_attribute { const char *version; } __attribute__ ((__aligned__(sizeof(void *)))); +extern ssize_t __modver_version_show(struct module_attribute *, + struct module *, char *); + struct module_kobject { struct kobject kobj; @@ -172,8 +175,6 @@ extern struct module __this_module; #define MODULE_VERSION(_version) MODULE_INFO(version, _version) #else #define MODULE_VERSION(_version) \ - extern ssize_t __modver_version_show(struct module_attribute *, \ - struct module *, char *); \ static struct module_version_attribute ___modver_attr = { \ .mattr = { \ .attr = { \ -- cgit v1.2.2 From a288bd651f4180c224cfddf837a0416157a36661 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Thu, 19 May 2011 16:55:25 -0600 Subject: module: remove 64 bit alignment padding from struct module with CONFIG_TRACE* Reorder struct module to remove 24 bytes of alignment padding on 64 bit builds when the CONFIG_TRACE options are selected. This allows the structure to fit into one fewer cache lines, and its size drops from 592 to 568 on x86_64. Signed-off-by: Richard Kennedy Signed-off-by: Rusty Russell --- include/linux/module.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 23996ad147e7..65cc6cc73ca8 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -368,34 +368,35 @@ struct module struct module_notes_attrs *notes_attrs; #endif + /* The command line arguments (may be mangled). People like + keeping pointers to this stuff */ + char *args; + #ifdef CONFIG_SMP /* Per-cpu data. */ void __percpu *percpu; unsigned int percpu_size; #endif - /* The command line arguments (may be mangled). People like - keeping pointers to this stuff */ - char *args; #ifdef CONFIG_TRACEPOINTS - struct tracepoint * const *tracepoints_ptrs; unsigned int num_tracepoints; + struct tracepoint * const *tracepoints_ptrs; #endif #ifdef HAVE_JUMP_LABEL struct jump_entry *jump_entries; unsigned int num_jump_entries; #endif #ifdef CONFIG_TRACING - const char **trace_bprintk_fmt_start; unsigned int num_trace_bprintk_fmt; + const char **trace_bprintk_fmt_start; #endif #ifdef CONFIG_EVENT_TRACING struct ftrace_event_call **trace_events; unsigned int num_trace_events; #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD - unsigned long *ftrace_callsites; unsigned int num_ftrace_callsites; + unsigned long *ftrace_callsites; #endif #ifdef CONFIG_MODULE_UNLOAD -- cgit v1.2.2 From c5be0b2eb1ca05e0cd747f9c0ba552c6ee8827a0 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Thu, 19 May 2011 16:55:25 -0600 Subject: module: reorder kparam_array to remove alignment padding on 64 bit builds Reorder structure kparam_array to remove 8 bytes of alignment padding on 64 bit builds, dropping its size from 40 to 32 bytes. Also update the macro module_param_array_named to initialise the structure using its member names to allow it to be changed without touching all its call sites. 'git grep' finds module_param_array in 1037 places so this patch will save a small amount of data space across many modules. Signed-off-by: Richard Kennedy Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 07b41951e3fa..ddaae98c53f9 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -67,9 +67,9 @@ struct kparam_string { struct kparam_array { unsigned int max; + unsigned int elemsize; unsigned int *num; const struct kernel_param_ops *ops; - unsigned int elemsize; void *elem; }; @@ -371,8 +371,9 @@ extern int param_get_invbool(char *buffer, const struct kernel_param *kp); */ #define module_param_array_named(name, array, type, nump, perm) \ static const struct kparam_array __param_arr_##name \ - = { ARRAY_SIZE(array), nump, ¶m_ops_##type, \ - sizeof(array[0]), array }; \ + = { .max = ARRAY_SIZE(array), .num = nump, \ + .ops = ¶m_ops_##type, \ + .elemsize = sizeof(array[0]), .elem = array }; \ __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_array_ops, \ .arr = &__param_arr_##name, \ -- cgit v1.2.2 From de4d8d53465483168d6a627d409ee2d09d8e3308 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 19 Apr 2011 21:49:58 +0200 Subject: module: each_symbol_section instead of each_symbol Instead of having a callback function for each symbol in the kernel, have a callback for each array of symbols. This eases the logic when we move to sorted symbols and binary search. Signed-off-by: Rusty Russell Signed-off-by: Alessio Igor Bogani --- include/linux/module.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 65cc6cc73ca8..49f4ad0ddec2 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -477,8 +477,9 @@ const struct kernel_symbol *find_symbol(const char *name, bool warn); /* Walk the exported symbol table */ -bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner, - unsigned int symnum, void *data), void *data); +bool each_symbol_section(bool (*fn)(const struct symsearch *arr, + struct module *owner, + void *data), void *data); /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if symnum out of range. */ -- cgit v1.2.2 From f02e8a6596b7dc9b2171f7ff5654039ef0950cdc Mon Sep 17 00:00:00 2001 From: Alessio Igor Bogani Date: Thu, 14 Apr 2011 14:59:39 +0200 Subject: module: Sort exported symbols This patch places every exported symbol in its own section (i.e. "___ksymtab+printk"). Thus the linker will use its SORT() directive to sort and finally merge all symbol in the right and final section (i.e. "__ksymtab"). The symbol prefixed archs use an underscore as prefix for symbols. To avoid collision we use a different character to create the temporary section names. This work was supported by a hardware donation from the CE Linux Forum. Signed-off-by: Alessio Igor Bogani Signed-off-by: Rusty Russell (folded in '+' fixup) Tested-by: Dirk Behme --- include/asm-generic/vmlinux.lds.h | 20 ++++++++++---------- include/linux/module.h | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index bd297a20ab98..b27445e00b6c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -274,70 +274,70 @@ /* Kernel symbol table: Normal symbols */ \ __ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab) = .; \ - *(__ksymtab) \ + *(SORT(___ksymtab+*)) \ VMLINUX_SYMBOL(__stop___ksymtab) = .; \ } \ \ /* Kernel symbol table: GPL-only symbols */ \ __ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab_gpl) = .; \ - *(__ksymtab_gpl) \ + *(SORT(___ksymtab_gpl+*)) \ VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .; \ } \ \ /* Kernel symbol table: Normal unused symbols */ \ __ksymtab_unused : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab_unused) = .; \ - *(__ksymtab_unused) \ + *(SORT(___ksymtab_unused+*)) \ VMLINUX_SYMBOL(__stop___ksymtab_unused) = .; \ } \ \ /* Kernel symbol table: GPL-only unused symbols */ \ __ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .; \ - *(__ksymtab_unused_gpl) \ + *(SORT(___ksymtab_unused_gpl+*)) \ VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .; \ } \ \ /* Kernel symbol table: GPL-future-only symbols */ \ __ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .; \ - *(__ksymtab_gpl_future) \ + *(SORT(___ksymtab_gpl_future+*)) \ VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .; \ } \ \ /* Kernel symbol table: Normal symbols */ \ __kcrctab : AT(ADDR(__kcrctab) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab) = .; \ - *(__kcrctab) \ + *(SORT(___kcrctab+*)) \ VMLINUX_SYMBOL(__stop___kcrctab) = .; \ } \ \ /* Kernel symbol table: GPL-only symbols */ \ __kcrctab_gpl : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab_gpl) = .; \ - *(__kcrctab_gpl) \ + *(SORT(___kcrctab_gpl+*)) \ VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .; \ } \ \ /* Kernel symbol table: Normal unused symbols */ \ __kcrctab_unused : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab_unused) = .; \ - *(__kcrctab_unused) \ + *(SORT(___kcrctab_unused+*)) \ VMLINUX_SYMBOL(__stop___kcrctab_unused) = .; \ } \ \ /* Kernel symbol table: GPL-only unused symbols */ \ __kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .; \ - *(__kcrctab_unused_gpl) \ + *(SORT(___kcrctab_unused_gpl+*)) \ VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .; \ } \ \ /* Kernel symbol table: GPL-future-only symbols */ \ __kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .; \ - *(__kcrctab_gpl_future) \ + *(SORT(___kcrctab_gpl_future+*)) \ VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .; \ } \ \ diff --git a/include/linux/module.h b/include/linux/module.h index 49f4ad0ddec2..d9ca2d5dc6d0 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -224,7 +224,7 @@ struct module_use { extern void *__crc_##sym __attribute__((weak)); \ static const unsigned long __kcrctab_##sym \ __used \ - __attribute__((section("__kcrctab" sec), unused)) \ + __attribute__((section("___kcrctab" sec "+" #sym), unused)) \ = (unsigned long) &__crc_##sym; #else #define __CRC_SYMBOL(sym, sec) @@ -239,7 +239,7 @@ struct module_use { = MODULE_SYMBOL_PREFIX #sym; \ static const struct kernel_symbol __ksymtab_##sym \ __used \ - __attribute__((section("__ksymtab" sec), unused)) \ + __attribute__((section("___ksymtab" sec "+" #sym), unused)) \ = { (unsigned long)&sym, __kstrtab_##sym } #define EXPORT_SYMBOL(sym) \ -- cgit v1.2.2 From 1a94dc35bc5c166d89913dc01a49d27a3c21a455 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Thu, 14 Apr 2011 20:00:19 +0200 Subject: lib: Add generic binary search function to the kernel. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There a large number hand-coded binary searches in the kernel (run "git grep search | grep binary" to find many of them). Since in my experience, hand-coding binary searches can be error-prone, it seems worth cleaning this up by providing a generic binary search function. This generic binary search implementation comes from Ksplice. It has the same basic API as the C library bsearch() function. Ksplice uses it in half a dozen places with 4 different comparison functions, and I think our code is substantially cleaner because of this. Signed-off-by: Tim Abbott Extra-bikeshedding-by: Alan Jenkins Extra-bikeshedding-by: André Goddard Rosa Extra-bikeshedding-by: Rusty Russell Signed-off-by: Rusty Russell Signed-off-by: Alessio Igor Bogani Signed-off-by: Rusty Russell --- include/linux/bsearch.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 include/linux/bsearch.h (limited to 'include') diff --git a/include/linux/bsearch.h b/include/linux/bsearch.h new file mode 100644 index 000000000000..90b1aa867224 --- /dev/null +++ b/include/linux/bsearch.h @@ -0,0 +1,9 @@ +#ifndef _LINUX_BSEARCH_H +#define _LINUX_BSEARCH_H + +#include + +void *bsearch(const void *key, const void *base, size_t num, size_t size, + int (*cmp)(const void *key, const void *elt)); + +#endif /* _LINUX_BSEARCH_H */ -- cgit v1.2.2 From d0f1fed29e6e73d9d17f4c91a5896a4ce3938d45 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 19 Apr 2011 12:43:45 +0100 Subject: Add a strtobool function matching semantics of existing in kernel equivalents This is a rename of the usr_strtobool proposal, which was a renamed, relocated and fixed version of previous kstrtobool RFC Signed-off-by: Jonathan Cameron Signed-off-by: Rusty Russell --- include/linux/string.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index a716ee2a8adb..a176db2f2c85 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -123,6 +123,7 @@ extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); extern bool sysfs_streq(const char *s1, const char *s2); +extern int strtobool(const char *s, bool *res); #ifdef CONFIG_BINARY_PRINTF int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); -- cgit v1.2.2 From b3ae52b6b0335eba547221aad2cb3c50902e3d2d Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Tue, 10 May 2011 23:31:30 +0200 Subject: SSB: Change fallback sprom to callback mechanism. Some embedded devices like the Netgear WNDR3300 have two SSB based cards without an own sprom on the pci bus. We have to provide two different fallback sproms for these and this was not possible with the old solution. In the bcm47xx architecture the sprom data is stored in the nvram in the main flash storage. The architecture code will be able to fill the sprom with the stored data based on the bus where the device was found. The bcm63xx code should do the same thing as before, just using the new API. Acked-by: Michael Buesch Cc: netdev@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: Florian Fainelli Signed-off-by: Hauke Mehrtens Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/2362/ Signed-off-by: Ralf Baechle --- include/linux/ssb/ssb.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 9659eff52ca2..045f72ab5dfd 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -404,7 +404,9 @@ extern bool ssb_is_sprom_available(struct ssb_bus *bus); /* Set a fallback SPROM. * See kdoc at the function definition for complete documentation. */ -extern int ssb_arch_set_fallback_sprom(const struct ssb_sprom *sprom); +extern int ssb_arch_register_fallback_sprom( + int (*sprom_callback)(struct ssb_bus *bus, + struct ssb_sprom *out)); /* Suspend a SSB bus. * Call this from the parent bus suspend routine. */ -- cgit v1.2.2 From 369db4c9524b7487faf1ff89646eee396c1363e1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 May 2011 21:33:40 +0000 Subject: clocksource: Restructure clocksource struct members Group the hot path members of struct clocksource together so we have a better cache line footprint. Make it cacheline aligned. Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Eric Dumazet Reviewed-by: Ingo Molnar Link: http://lkml.kernel.org/r/%3C20110518210136.003081882%40linutronix.de%3E --- include/linux/clocksource.h | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 0fb0b7e79394..c918fbd33ee5 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -159,42 +159,38 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, */ struct clocksource { /* - * First part of structure is read mostly + * Hotpath data, fits in a single cache line when the + * clocksource itself is cacheline aligned. */ - const char *name; - struct list_head list; - int rating; cycle_t (*read)(struct clocksource *cs); - int (*enable)(struct clocksource *cs); - void (*disable)(struct clocksource *cs); + cycle_t cycle_last; cycle_t mask; u32 mult; u32 shift; u64 max_idle_ns; - unsigned long flags; - cycle_t (*vread)(void); - void (*suspend)(struct clocksource *cs); - void (*resume)(struct clocksource *cs); + #ifdef CONFIG_IA64 void *fsys_mmio; /* used by fsyscall asm code */ #define CLKSRC_FSYS_MMIO_SET(mmio, addr) ((mmio) = (addr)) #else #define CLKSRC_FSYS_MMIO_SET(mmio, addr) do { } while (0) #endif - - /* - * Second part is written at each timer interrupt - * Keep it in a different cache line to dirty no - * more than one cache line. - */ - cycle_t cycle_last ____cacheline_aligned_in_smp; + const char *name; + struct list_head list; + int rating; + cycle_t (*vread)(void); + int (*enable)(struct clocksource *cs); + void (*disable)(struct clocksource *cs); + unsigned long flags; + void (*suspend)(struct clocksource *cs); + void (*resume)(struct clocksource *cs); #ifdef CONFIG_CLOCKSOURCE_WATCHDOG /* Watchdog related data, used by the framework */ struct list_head wd_list; cycle_t wd_last; #endif -}; +} ____cacheline_aligned; /* * Clock source flags bits:: -- cgit v1.2.2 From 847b2f42be203f3cff7f243fdd3ee50c1e06c882 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 May 2011 21:33:41 +0000 Subject: clockevents: Restructure clock_event_device members Group the hot path members of struct clock_event_device together so we have a better cache line footprint. Make it cacheline aligned. Signed-off-by: Thomas Gleixner Cc: John Stultz Reviewed-by: Ingo Molnar Link: http://lkml.kernel.org/r/%3C20110518210136.223607682%40linutronix.de%3E --- include/linux/clockchips.h | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index fc53492b6ad7..9466eebc8e19 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -56,46 +56,47 @@ enum clock_event_nofitiers { /** * struct clock_event_device - clock event device descriptor - * @name: ptr to clock event name - * @features: features + * @event_handler: Assigned by the framework to be called by the low + * level handler of the event source + * @set_next_event: set next event function + * @next_event: local storage for the next event in oneshot mode * @max_delta_ns: maximum delta value in ns * @min_delta_ns: minimum delta value in ns * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) + * @mode: operating mode assigned by the management code + * @features: features + * @retries: number of forced programming retries + * @set_mode: set mode function + * @broadcast: function to broadcast events + * @name: ptr to clock event name * @rating: variable to rate clock event devices * @irq: IRQ number (only for non CPU local devices) * @cpumask: cpumask to indicate for which CPUs this device works - * @set_next_event: set next event function - * @set_mode: set mode function - * @event_handler: Assigned by the framework to be called by the low - * level handler of the event source - * @broadcast: function to broadcast events * @list: list head for the management code - * @mode: operating mode assigned by the management code - * @next_event: local storage for the next event in oneshot mode - * @retries: number of forced programming retries */ struct clock_event_device { - const char *name; - unsigned int features; + void (*event_handler)(struct clock_event_device *); + int (*set_next_event)(unsigned long evt, + struct clock_event_device *); + ktime_t next_event; u64 max_delta_ns; u64 min_delta_ns; u32 mult; u32 shift; + enum clock_event_mode mode; + unsigned int features; + unsigned long retries; + + void (*broadcast)(const struct cpumask *mask); + void (*set_mode)(enum clock_event_mode mode, + struct clock_event_device *); + const char *name; int rating; int irq; const struct cpumask *cpumask; - int (*set_next_event)(unsigned long evt, - struct clock_event_device *); - void (*set_mode)(enum clock_event_mode mode, - struct clock_event_device *); - void (*event_handler)(struct clock_event_device *); - void (*broadcast)(const struct cpumask *mask); struct list_head list; - enum clock_event_mode mode; - ktime_t next_event; - unsigned long retries; -}; +} ____cacheline_aligned; /* * Calculate a multiplication factor for scaled math, which is used to convert -- cgit v1.2.2 From 57f0fcbe1dea8a36c9d1673086326059991c5f81 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 May 2011 21:33:41 +0000 Subject: clockevents: Provide combined configure and register function All clockevent devices have the same open coded initialization functions. Provide an interface which does all necessary initialization in the core code. Signed-off-by: Thomas Gleixner Cc: John Stultz Reviewed-by: Ingo Molnar Link: http://lkml.kernel.org/r/%3C20110518210136.331975870%40linutronix.de%3E --- include/linux/clockchips.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 9466eebc8e19..80acc79e0dc5 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -69,6 +69,8 @@ enum clock_event_nofitiers { * @retries: number of forced programming retries * @set_mode: set mode function * @broadcast: function to broadcast events + * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration + * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration * @name: ptr to clock event name * @rating: variable to rate clock event devices * @irq: IRQ number (only for non CPU local devices) @@ -91,6 +93,9 @@ struct clock_event_device { void (*broadcast)(const struct cpumask *mask); void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); + unsigned long min_delta_ticks; + unsigned long max_delta_ticks; + const char *name; int rating; int irq; @@ -123,6 +128,10 @@ extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); +extern void clockevents_config_and_register(struct clock_event_device *dev, + u32 freq, unsigned long min_delta, + unsigned long max_delta); + extern void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new); extern void clockevents_set_mode(struct clock_event_device *dev, -- cgit v1.2.2 From 80b816b736cfa5b9582279127099b20a479ab7d9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 May 2011 21:33:42 +0000 Subject: clockevents: Provide interface to reconfigure an active clock event device Some ARM SoCs have clock event devices which have their frequency modified due to frequency scaling. Provide an interface which allows to reconfigure an active device. After reconfiguration reprogram the current pending event. Signed-off-by: Thomas Gleixner Cc: LAK Cc: John Stultz Acked-by: Linus Walleij Reviewed-by: Ingo Molnar Link: http://lkml.kernel.org/r/%3C20110518210136.437459958%40linutronix.de%3E --- include/linux/clockchips.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 80acc79e0dc5..d6733e27af34 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -132,6 +132,8 @@ extern void clockevents_config_and_register(struct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta); +extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); + extern void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new); extern void clockevents_set_mode(struct clock_event_device *dev, -- cgit v1.2.2 From 75d65a425c0163d3ec476ddc12b51087217a070c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 19 May 2011 13:50:07 -0700 Subject: hlist: remove software prefetching in hlist iterators They not only increase the code footprint, they actually make things slower rather than faster. On internationally acclaimed benchmarks ("make -j16" on an already fully built kernel source tree) the hlist prefetching slows down the build by up to 1%. (Almost all of it comes from hlist_for_each_entry_rcu() as used by avc_has_perm_noaudit(), which is very hot due to all the pathname lookups to see if there is anything to do). The cause seems to be two-fold: - on at least some Intel cores, prefetch(NULL) ends up with some microarchitectural stall due to the TLB miss that it incurs. The hlist case triggers this very commonly, since the NULL pointer is the last entry in the list. - the prefetch appears to cause more D$ activity, probably because it prefetches hash list entries that are never actually used (because we ended the search early due to a hit). Regardless, the numbers clearly say that the implicit prefetching is simply a bad idea. If some _particular_ user of the hlist iterators wants to prefetch the next list entry, they can do so themselves explicitly, rather than depend on all list iterators doing so implicitly. Acked-by: Ingo Molnar Acked-by: David S. Miller Cc: linux-arch@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/list.h | 9 ++++----- include/linux/rculist.h | 10 +++++----- 2 files changed, 9 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index 3a54266a1e85..9ac11148e037 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -664,8 +664,7 @@ static inline void hlist_move_list(struct hlist_head *old, #define hlist_entry(ptr, type, member) container_of(ptr,type,member) #define hlist_for_each(pos, head) \ - for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \ - pos = pos->next) + for (pos = (head)->first; pos ; pos = pos->next) #define hlist_for_each_safe(pos, n, head) \ for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ @@ -680,7 +679,7 @@ static inline void hlist_move_list(struct hlist_head *old, */ #define hlist_for_each_entry(tpos, pos, head, member) \ for (pos = (head)->first; \ - pos && ({ prefetch(pos->next); 1;}) && \ + pos && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ pos = pos->next) @@ -692,7 +691,7 @@ static inline void hlist_move_list(struct hlist_head *old, */ #define hlist_for_each_entry_continue(tpos, pos, member) \ for (pos = (pos)->next; \ - pos && ({ prefetch(pos->next); 1;}) && \ + pos && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ pos = pos->next) @@ -703,7 +702,7 @@ static inline void hlist_move_list(struct hlist_head *old, * @member: the name of the hlist_node within the struct. */ #define hlist_for_each_entry_from(tpos, pos, member) \ - for (; pos && ({ prefetch(pos->next); 1;}) && \ + for (; pos && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ pos = pos->next) diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 2dea94fc4402..900a97a44769 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -427,7 +427,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, #define __hlist_for_each_rcu(pos, head) \ for (pos = rcu_dereference(hlist_first_rcu(head)); \ - pos && ({ prefetch(pos->next); 1; }); \ + pos; \ pos = rcu_dereference(hlist_next_rcu(pos))) /** @@ -443,7 +443,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, */ #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ - pos && ({ prefetch(pos->next); 1; }) && \ + pos && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_raw(hlist_next_rcu(pos))) @@ -460,7 +460,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, */ #define hlist_for_each_entry_rcu_bh(tpos, pos, head, member) \ for (pos = rcu_dereference_bh((head)->first); \ - pos && ({ prefetch(pos->next); 1; }) && \ + pos && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_bh(pos->next)) @@ -472,7 +472,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, */ #define hlist_for_each_entry_continue_rcu(tpos, pos, member) \ for (pos = rcu_dereference((pos)->next); \ - pos && ({ prefetch(pos->next); 1; }) && \ + pos && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference(pos->next)) @@ -484,7 +484,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, */ #define hlist_for_each_entry_continue_rcu_bh(tpos, pos, member) \ for (pos = rcu_dereference_bh((pos)->next); \ - pos && ({ prefetch(pos->next); 1; }) && \ + pos && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_bh(pos->next)) -- cgit v1.2.2 From e66eed651fd18a961f11cda62f3b5286c8cc4f9f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 19 May 2011 14:15:29 -0700 Subject: list: remove prefetching from regular list iterators This is removes the use of software prefetching from the regular list iterators. We don't want it. If you do want to prefetch in some iterator of yours, go right ahead. Just don't expect the iterator to do it, since normally the downsides are bigger than the upsides. It also replaces with , because the use of LIST_POISON ends up needing it. is sadly not self-contained, and including prefetch.h just happened to hide that. Suggested by David Miller (networking has a lot of regular lists that are often empty or a single entry, and prefetching is not going to do anything but add useless instructions). Acked-by: Ingo Molnar Acked-by: David S. Miller Cc: linux-arch@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/list.h | 26 +++++++++++--------------- include/linux/rculist.h | 6 +++--- 2 files changed, 14 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index 9ac11148e037..cc6d2aa6b415 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include /* * Simple doubly linked list implementation. @@ -367,18 +367,15 @@ static inline void list_splice_tail_init(struct list_head *list, * @head: the head for your list. */ #define list_for_each(pos, head) \ - for (pos = (head)->next; prefetch(pos->next), pos != (head); \ - pos = pos->next) + for (pos = (head)->next; pos != (head); pos = pos->next) /** * __list_for_each - iterate over a list * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. * - * This variant differs from list_for_each() in that it's the - * simplest possible list iteration code, no prefetching is done. - * Use this for code that knows the list to be very short (empty - * or 1 entry) most of the time. + * This variant doesn't differ from list_for_each() any more. + * We don't do prefetching in either case. */ #define __list_for_each(pos, head) \ for (pos = (head)->next; pos != (head); pos = pos->next) @@ -389,8 +386,7 @@ static inline void list_splice_tail_init(struct list_head *list, * @head: the head for your list. */ #define list_for_each_prev(pos, head) \ - for (pos = (head)->prev; prefetch(pos->prev), pos != (head); \ - pos = pos->prev) + for (pos = (head)->prev; pos != (head); pos = pos->prev) /** * list_for_each_safe - iterate over a list safe against removal of list entry @@ -410,7 +406,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_prev_safe(pos, n, head) \ for (pos = (head)->prev, n = pos->prev; \ - prefetch(pos->prev), pos != (head); \ + pos != (head); \ pos = n, n = pos->prev) /** @@ -421,7 +417,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry(pos, head, member) \ for (pos = list_entry((head)->next, typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ + &pos->member != (head); \ pos = list_entry(pos->member.next, typeof(*pos), member)) /** @@ -432,7 +428,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry_reverse(pos, head, member) \ for (pos = list_entry((head)->prev, typeof(*pos), member); \ - prefetch(pos->member.prev), &pos->member != (head); \ + &pos->member != (head); \ pos = list_entry(pos->member.prev, typeof(*pos), member)) /** @@ -457,7 +453,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry_continue(pos, head, member) \ for (pos = list_entry(pos->member.next, typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ + &pos->member != (head); \ pos = list_entry(pos->member.next, typeof(*pos), member)) /** @@ -471,7 +467,7 @@ static inline void list_splice_tail_init(struct list_head *list, */ #define list_for_each_entry_continue_reverse(pos, head, member) \ for (pos = list_entry(pos->member.prev, typeof(*pos), member); \ - prefetch(pos->member.prev), &pos->member != (head); \ + &pos->member != (head); \ pos = list_entry(pos->member.prev, typeof(*pos), member)) /** @@ -483,7 +479,7 @@ static inline void list_splice_tail_init(struct list_head *list, * Iterate over list of given type, continuing from current position. */ #define list_for_each_entry_from(pos, head, member) \ - for (; prefetch(pos->member.next), &pos->member != (head); \ + for (; &pos->member != (head); \ pos = list_entry(pos->member.next, typeof(*pos), member)) /** diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 900a97a44769..e3beb315517a 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -253,7 +253,7 @@ static inline void list_splice_init_rcu(struct list_head *list, */ #define list_for_each_entry_rcu(pos, head, member) \ for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ + &pos->member != (head); \ pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) @@ -270,7 +270,7 @@ static inline void list_splice_init_rcu(struct list_head *list, */ #define list_for_each_continue_rcu(pos, head) \ for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \ - prefetch((pos)->next), (pos) != (head); \ + (pos) != (head); \ (pos) = rcu_dereference_raw(list_next_rcu(pos))) /** @@ -284,7 +284,7 @@ static inline void list_splice_init_rcu(struct list_head *list, */ #define list_for_each_entry_continue_rcu(pos, head, member) \ for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ + &pos->member != (head); \ pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) /** -- cgit v1.2.2 From 1477fcc290b3d5c2614bde98bf3b1154c538860d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 20 May 2011 11:11:53 +1000 Subject: signal.h need a definition of struct task_struct This fixes these build errors on powerpc: In file included from arch/powerpc/mm/fault.c:18: include/linux/signal.h:239: error: 'struct task_struct' declared inside parameter list include/linux/signal.h:239: error: its scope is only this definition or declaration, which is probably not what you want include/linux/signal.h:240: error: 'struct task_struct' declared inside parameter list .. Exposed by commit e66eed651fd1 ("list: remove prefetching from regular list iterators"), which removed the include of from . Without that, linux/signal.h no longer accidentally got the declaration of 'struct task_struct'. Fix by properly declaring the struct, rather than introducing any new header file dependency. Signed-off-by: Stephen Rothwell Signed-off-by: Linus Torvalds --- include/linux/signal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/signal.h b/include/linux/signal.h index fcd2b14b1932..29a68ac7af83 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -7,6 +7,8 @@ #ifdef __KERNEL__ #include +struct task_struct; + /* for sysctl */ extern int print_fatal_signals; /* -- cgit v1.2.2