From 6859a8402945cf1d74af75a2e1aa4e327a506ab4 Mon Sep 17 00:00:00 2001 From: Alan Mayer Date: Wed, 26 Mar 2008 16:11:31 -0500 Subject: x86: resize NR_IRQS for large machines On machines with very large numbers of cpus, tables that are dimensioned by NR_IRQS get very large, especially the irq_desc table. They are also very sparsely used. When the cpu count is > MAX_IO_APICS, use MAX_IO_APICS to set NR_IRQS, otherwise use NR_CPUS. Signed-off-by: Alan Mayer Reviewed-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/kernel_stat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index e8ffce898bf9..cf9f40a91c9c 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -1,11 +1,11 @@ #ifndef _LINUX_KERNEL_STAT_H #define _LINUX_KERNEL_STAT_H -#include #include #include #include #include +#include #include /* -- cgit v1.2.2 From 988f7b5789ccf5cfed14c72e28573a49f0cb4809 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:22 -0700 Subject: x86: PAT export resource_wc in pci sysfs For the ranges with IORESOURCE_PREFETCH, export a new resource_wc interface in pci /sysfs along with resource (which is uncached). Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Acked-by: Jesse Barnes Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 509159bcd4e7..d18b1dd49fab 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -206,6 +206,7 @@ struct pci_dev { struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */ int rom_attr_enabled; /* has display of the rom attribute been enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ + struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */ #ifdef CONFIG_PCI_MSI struct list_head msi_list; #endif -- cgit v1.2.2 From 8b09dee67f484e9b42114b1a1f068e080fd7aa56 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcupreempt: remove duplicate prototypes rcu_batches_completed and rcu_patches_completed_bh are both declared in rcuclassic.h and rcupreempt.h. This patch removes the extra prototypes for them from rcupdate.h. rcu_batches_completed_bh is defined as a static inline in the rcupreempt.h header file. Trying to export this as EXPORT_SYMBOL_GPL causes linking problems with the powerpc linker. There's no need to export a static inlined function. Modules must be compiled with the same type of RCU implementation as the kernel they are for. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d42dbec06083..ec2fc5b32646 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -224,8 +224,6 @@ extern void call_rcu_bh(struct rcu_head *head, /* Exported common interfaces */ extern void synchronize_rcu(void); extern void rcu_barrier(void); -extern long rcu_batches_completed(void); -extern long rcu_batches_completed_bh(void); /* Internal to kernel */ extern void rcu_init(void); -- cgit v1.2.2 From 4446a36ff8c74ac3b32feb009b651048e129c6af Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: add call_rcu_sched() Fourth cut of patch to provide the call_rcu_sched(). This is again to synchronize_sched() as call_rcu() is to synchronize_rcu(). Should be fine for experimental and -rt use, but not ready for inclusion. With some luck, I will be able to tell Andrew to come out of hiding on the next round. Passes multi-day rcutorture sessions with concurrent CPU hotplugging. Fixes since the first version include a bug that could result in indefinite blocking (spotted by Gautham Shenoy), better resiliency against CPU-hotplug operations, and other minor fixes. Fixes since the second version include reworking grace-period detection to avoid deadlocks that could happen when running concurrently with CPU hotplug, adding Mathieu's fix to avoid the softlockup messages, as well as Mathieu's fix to allow use earlier in boot. Fixes since the third version include a wrong-CPU bug spotted by Andrew, getting rid of the obsolete synchronize_kernel API that somehow snuck back in, merging spin_unlock() and local_irq_restore() in a few places, commenting the code that checks for quiescent states based on interrupting from user-mode execution or the idle loop, removing some inline attributes, and some code-style changes. Known/suspected shortcomings: o I still do not entirely trust the sleep/wakeup logic. Next step will be to use a private snapshot of the CPU online mask in rcu_sched_grace_period() -- if the CPU wasn't there at the start of the grace period, we don't need to hear from it. And the bit about accounting for changes in online CPUs inside of rcu_sched_grace_period() is ugly anyway. o It might be good for rcu_sched_grace_period() to invoke resched_cpu() when a given CPU wasn't responding quickly, but resched_cpu() is declared static... This patch also fixes a long-standing bug in the earlier preemptable-RCU implementation of synchronize_rcu() that could result in loss of concurrent external changes to a task's CPU affinity mask. I still cannot remember who reported this... Signed-off-by: Paul E. McKenney Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/rcuclassic.h | 3 +++ include/linux/rcupdate.h | 22 ++++++++++++++++++++++ include/linux/rcupreempt.h | 42 ++++++++++++++++++++++++++++++++++++------ 3 files changed, 61 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index b3aa05baab8a..8c774905dcfe 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -151,7 +151,10 @@ extern struct lockdep_map rcu_lock_map; #define __synchronize_sched() synchronize_rcu() +#define call_rcu_sched(head, func) call_rcu(head, func) + extern void __rcu_init(void); +#define rcu_init_sched() do { } while (0) extern void rcu_check_callbacks(int cpu, int user); extern void rcu_restart_cpu(int cpu); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ec2fc5b32646..411969cb5243 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -40,6 +40,7 @@ #include #include #include +#include /** * struct rcu_head - callback structure for use with RCU @@ -168,6 +169,27 @@ struct rcu_head { (p) = (v); \ }) +/* Infrastructure to implement the synchronize_() primitives. */ + +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; +}; + +extern void wakeme_after_rcu(struct rcu_head *head); + +#define synchronize_rcu_xxx(name, func) \ +void name(void) \ +{ \ + struct rcu_synchronize rcu; \ + \ + init_completion(&rcu.completion); \ + /* Will wake me after RCU finished. */ \ + func(&rcu.head, wakeme_after_rcu); \ + /* Wait for it. */ \ + wait_for_completion(&rcu.completion); \ +} + /** * synchronize_sched - block until all CPUs have exited any non-preemptive * kernel code sequences. diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 8a05c7e20bc4..f04b64eca636 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -40,10 +40,39 @@ #include #include -#define rcu_qsctr_inc(cpu) +struct rcu_dyntick_sched { + int dynticks; + int dynticks_snap; + int sched_qs; + int sched_qs_snap; + int sched_dynticks_snap; +}; + +DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); + +static inline void rcu_qsctr_inc(int cpu) +{ + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + rdssp->sched_qs++; +} #define rcu_bh_qsctr_inc(cpu) #define call_rcu_bh(head, rcu) call_rcu(head, rcu) +/** + * call_rcu_sched - Queue RCU callback for invocation after sched grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * + * The update function will be invoked some time after a full + * synchronize_sched()-style grace period elapses, in other words after + * all currently executing preempt-disabled sections of code (including + * hardirq handlers, NMI handlers, and local_irq_save() blocks) have + * completed. + */ +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *head)); + extern void __rcu_read_lock(void) __acquires(RCU); extern void __rcu_read_unlock(void) __releases(RCU); extern int rcu_pending(int cpu); @@ -55,6 +84,7 @@ extern int rcu_needs_cpu(int cpu); extern void __synchronize_sched(void); extern void __rcu_init(void); +extern void rcu_init_sched(void); extern void rcu_check_callbacks(int cpu, int user); extern void rcu_restart_cpu(int cpu); extern long rcu_batches_completed(void); @@ -81,20 +111,20 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu); struct softirq_action; #ifdef CONFIG_NO_HZ -DECLARE_PER_CPU(long, dynticks_progress_counter); +DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); static inline void rcu_enter_nohz(void) { smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ - __get_cpu_var(dynticks_progress_counter)++; - WARN_ON(__get_cpu_var(dynticks_progress_counter) & 0x1); + __get_cpu_var(rcu_dyntick_sched).dynticks++; + WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1); } static inline void rcu_exit_nohz(void) { - __get_cpu_var(dynticks_progress_counter)++; smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ - WARN_ON(!(__get_cpu_var(dynticks_progress_counter) & 0x1)); + __get_cpu_var(rcu_dyntick_sched).dynticks++; + WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1)); } #else /* CONFIG_NO_HZ */ -- cgit v1.2.2 From 70f12f848d3e981479b4f6f751e73c14f7c13e5b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: add rcu_barrier_sched() and rcu_barrier_bh() Add rcu_barrier_sched() and rcu_barrier_bh(). With these in place, rcutorture no longer gives the occasional oops when repeatedly starting and stopping torturing rcu_bh. Also adds the API needed to flush out pre-existing call_rcu_sched() callbacks. Signed-off-by: Paul E. McKenney Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/rcupdate.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 411969cb5243..e8b4039cfb2f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -246,6 +246,8 @@ extern void call_rcu_bh(struct rcu_head *head, /* Exported common interfaces */ extern void synchronize_rcu(void); extern void rcu_barrier(void); +extern void rcu_barrier_bh(void); +extern void rcu_barrier_sched(void); /* Internal to kernel */ extern void rcu_init(void); -- cgit v1.2.2 From 82524746c27fa418c250a56dd7606b9d3fc79826 Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: split list.h and move rcu-protected lists into rculist.h Move rcu-protected lists from list.h into a new header file rculist.h. This is done because list are a very used primitive structure all over the kernel and it's currently impossible to include other header files in this list.h without creating some circular dependencies. For example, list.h implements rcu-protected list and uses rcu_dereference() without including rcupdate.h. It actually compiles because users of rcu_dereference() are macros. Others RCU functions could be used too but aren't probably because of this. Therefore this patch creates rculist.h which includes rcupdates without to many changes/troubles. Signed-off-by: Franck Bui-Huu Acked-by: Paul E. McKenney Acked-by: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/dcache.h | 1 + include/linux/list.h | 367 -------------------------------------------- include/linux/rculist.h | 396 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 397 insertions(+), 367 deletions(-) create mode 100644 include/linux/rculist.h (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 2a6639407c80..1f5cebf10a23 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/include/linux/list.h b/include/linux/list.h index 08cf4f651889..139ec41d9c2e 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -84,65 +84,6 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head) __list_add(new, head->prev, head); } -/* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_add_rcu(struct list_head * new, - struct list_head * prev, struct list_head * next) -{ - new->next = next; - new->prev = prev; - smp_wmb(); - next->prev = new; - prev->next = new; -} - -/** - * list_add_rcu - add a new entry to rcu-protected list - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_add_rcu() - * or list_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - */ -static inline void list_add_rcu(struct list_head *new, struct list_head *head) -{ - __list_add_rcu(new, head, head->next); -} - -/** - * list_add_tail_rcu - add a new entry to rcu-protected list - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_add_tail_rcu() - * or list_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - */ -static inline void list_add_tail_rcu(struct list_head *new, - struct list_head *head) -{ - __list_add_rcu(new, head->prev, head); -} - /* * Delete a list entry by making the prev/next entries * point to each other. @@ -173,36 +114,6 @@ static inline void list_del(struct list_head *entry) extern void list_del(struct list_head *entry); #endif -/** - * list_del_rcu - deletes entry from list without re-initialization - * @entry: the element to delete from the list. - * - * Note: list_empty() on entry does not return true after this, - * the entry is in an undefined state. It is useful for RCU based - * lockfree traversal. - * - * In particular, it means that we can not poison the forward - * pointers that may still be used for walking the list. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_del_rcu() - * or list_add_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - * - * Note that the caller is not permitted to immediately free - * the newly deleted entry. Instead, either synchronize_rcu() - * or call_rcu() must be used to defer freeing until an RCU - * grace period has elapsed. - */ -static inline void list_del_rcu(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - entry->prev = LIST_POISON2; -} - /** * list_replace - replace old entry by new one * @old : the element to be replaced @@ -226,25 +137,6 @@ static inline void list_replace_init(struct list_head *old, INIT_LIST_HEAD(old); } -/** - * list_replace_rcu - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * - * The @old entry will be replaced with the @new entry atomically. - * Note: @old should not be empty. - */ -static inline void list_replace_rcu(struct list_head *old, - struct list_head *new) -{ - new->next = old->next; - new->prev = old->prev; - smp_wmb(); - new->next->prev = new; - new->prev->next = new; - old->prev = LIST_POISON2; -} - /** * list_del_init - deletes entry from list and reinitialize it. * @entry: the element to delete from the list. @@ -368,62 +260,6 @@ static inline void list_splice_init(struct list_head *list, } } -/** - * list_splice_init_rcu - splice an RCU-protected list into an existing list. - * @list: the RCU-protected list to splice - * @head: the place in the list to splice the first list into - * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... - * - * @head can be RCU-read traversed concurrently with this function. - * - * Note that this function blocks. - * - * Important note: the caller must take whatever action is necessary to - * prevent any other updates to @head. In principle, it is possible - * to modify the list as soon as sync() begins execution. - * If this sort of thing becomes necessary, an alternative version - * based on call_rcu() could be created. But only if -really- - * needed -- there is no shortage of RCU API members. - */ -static inline void list_splice_init_rcu(struct list_head *list, - struct list_head *head, - void (*sync)(void)) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - if (list_empty(head)) - return; - - /* "first" and "last" tracking list, so initialize it. */ - - INIT_LIST_HEAD(list); - - /* - * At this point, the list body still points to the source list. - * Wait for any readers to finish using the list before splicing - * the list body into the new list. Any new readers will see - * an empty list. - */ - - sync(); - - /* - * Readers are finished with the source list, so perform splice. - * The order is important if the new list is global and accessible - * to concurrent RCU readers. Note that RCU readers are not - * permitted to traverse the prev pointers without excluding - * this function. - */ - - last->next = at; - smp_wmb(); - head->next = first; - first->prev = head; - at->prev = last; -} - /** * list_entry - get the struct for this entry * @ptr: the &struct list_head pointer. @@ -629,57 +465,6 @@ static inline void list_splice_init_rcu(struct list_head *list, &pos->member != (head); \ pos = n, n = list_entry(n->member.prev, typeof(*n), member)) -/** - * list_for_each_rcu - iterate over an rcu-protected list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - prefetch(pos->next), pos != (head); \ - pos = rcu_dereference(pos->next)) - -#define __list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - pos != (head); \ - pos = rcu_dereference(pos->next)) - -/** - * list_for_each_entry_rcu - iterate over rcu list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) - - -/** - * list_for_each_continue_rcu - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * Iterate over an rcu-protected list, continuing after current point. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_continue_rcu(pos, head) \ - for ((pos) = rcu_dereference((pos)->next); \ - prefetch((pos)->next), (pos) != (head); \ - (pos) = rcu_dereference((pos)->next)) - /* * Double linked lists with a single pointer list head. * Mostly useful for hash tables where the two pointer list head is @@ -730,31 +515,6 @@ static inline void hlist_del(struct hlist_node *n) n->pprev = LIST_POISON2; } -/** - * hlist_del_rcu - deletes entry from hash list without re-initialization - * @n: the element to delete from the hash list. - * - * Note: list_unhashed() on entry does not return true after this, - * the entry is in an undefined state. It is useful for RCU based - * lockfree traversal. - * - * In particular, it means that we can not poison the forward - * pointers that may still be used for walking the hash list. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry(). - */ -static inline void hlist_del_rcu(struct hlist_node *n) -{ - __hlist_del(n); - n->pprev = LIST_POISON2; -} - static inline void hlist_del_init(struct hlist_node *n) { if (!hlist_unhashed(n)) { @@ -763,27 +523,6 @@ static inline void hlist_del_init(struct hlist_node *n) } } -/** - * hlist_replace_rcu - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * - * The @old entry will be replaced with the @new entry atomically. - */ -static inline void hlist_replace_rcu(struct hlist_node *old, - struct hlist_node *new) -{ - struct hlist_node *next = old->next; - - new->next = next; - new->pprev = old->pprev; - smp_wmb(); - if (next) - new->next->pprev = &new->next; - *new->pprev = new; - old->pprev = LIST_POISON2; -} - static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) { struct hlist_node *first = h->first; @@ -794,38 +533,6 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) n->pprev = &h->first; } - -/** - * hlist_add_head_rcu - * @n: the element to add to the hash list. - * @h: the list to add to. - * - * Description: - * Adds the specified element to the specified hlist, - * while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. Regardless of the type of CPU, the - * list-traversal primitive must be guarded by rcu_read_lock(). - */ -static inline void hlist_add_head_rcu(struct hlist_node *n, - struct hlist_head *h) -{ - struct hlist_node *first = h->first; - n->next = first; - n->pprev = &h->first; - smp_wmb(); - if (first) - first->pprev = &n->next; - h->first = n; -} - /* next must be != NULL */ static inline void hlist_add_before(struct hlist_node *n, struct hlist_node *next) @@ -847,63 +554,6 @@ static inline void hlist_add_after(struct hlist_node *n, next->next->pprev = &next->next; } -/** - * hlist_add_before_rcu - * @n: the new element to add to the hash list. - * @next: the existing element to add the new element before. - * - * Description: - * Adds the specified element to the specified hlist - * before the specified node while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. - */ -static inline void hlist_add_before_rcu(struct hlist_node *n, - struct hlist_node *next) -{ - n->pprev = next->pprev; - n->next = next; - smp_wmb(); - next->pprev = &n->next; - *(n->pprev) = n; -} - -/** - * hlist_add_after_rcu - * @prev: the existing element to add the new element after. - * @n: the new element to add to the hash list. - * - * Description: - * Adds the specified element to the specified hlist - * after the specified node while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. - */ -static inline void hlist_add_after_rcu(struct hlist_node *prev, - struct hlist_node *n) -{ - n->next = prev->next; - n->pprev = &prev->next; - smp_wmb(); - prev->next = n; - if (n->next) - n->next->pprev = &n->next; -} - #define hlist_entry(ptr, type, member) container_of(ptr,type,member) #define hlist_for_each(pos, head) \ @@ -964,21 +614,4 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ pos = n) -/** - * hlist_for_each_entry_rcu - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as hlist_add_head_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = rcu_dereference(pos->next)) - #endif diff --git a/include/linux/rculist.h b/include/linux/rculist.h new file mode 100644 index 000000000000..aa9b3eb15683 --- /dev/null +++ b/include/linux/rculist.h @@ -0,0 +1,396 @@ +#ifndef _LINUX_RCULIST_H +#define _LINUX_RCULIST_H + +#ifdef __KERNEL__ + +/* + * RCU-protected list version + */ +#include + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add_rcu(struct list_head *new, + struct list_head *prev, struct list_head *next) +{ + new->next = next; + new->prev = prev; + smp_wmb(); + next->prev = new; + prev->next = new; +} + +/** + * list_add_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_rcu(struct list_head *new, struct list_head *head) +{ + __list_add_rcu(new, head, head->next); +} + +/** + * list_add_tail_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_tail_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_tail_rcu(struct list_head *new, + struct list_head *head) +{ + __list_add_rcu(new, head->prev, head); +} + +/** + * list_del_rcu - deletes entry from list without re-initialization + * @entry: the element to delete from the list. + * + * Note: list_empty() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_del_rcu() + * or list_add_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + * + * Note that the caller is not permitted to immediately free + * the newly deleted entry. Instead, either synchronize_rcu() + * or call_rcu() must be used to defer freeing until an RCU + * grace period has elapsed. + */ +static inline void list_del_rcu(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->prev = LIST_POISON2; +} + +/** + * list_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + * Note: @old should not be empty. + */ +static inline void list_replace_rcu(struct list_head *old, + struct list_head *new) +{ + new->next = old->next; + new->prev = old->prev; + smp_wmb(); + new->next->prev = new; + new->prev->next = new; + old->prev = LIST_POISON2; +} + +/** + * list_splice_init_rcu - splice an RCU-protected list into an existing list. + * @list: the RCU-protected list to splice + * @head: the place in the list to splice the first list into + * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... + * + * @head can be RCU-read traversed concurrently with this function. + * + * Note that this function blocks. + * + * Important note: the caller must take whatever action is necessary to + * prevent any other updates to @head. In principle, it is possible + * to modify the list as soon as sync() begins execution. + * If this sort of thing becomes necessary, an alternative version + * based on call_rcu() could be created. But only if -really- + * needed -- there is no shortage of RCU API members. + */ +static inline void list_splice_init_rcu(struct list_head *list, + struct list_head *head, + void (*sync)(void)) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + struct list_head *at = head->next; + + if (list_empty(head)) + return; + + /* "first" and "last" tracking list, so initialize it. */ + + INIT_LIST_HEAD(list); + + /* + * At this point, the list body still points to the source list. + * Wait for any readers to finish using the list before splicing + * the list body into the new list. Any new readers will see + * an empty list. + */ + + sync(); + + /* + * Readers are finished with the source list, so perform splice. + * The order is important if the new list is global and accessible + * to concurrent RCU readers. Note that RCU readers are not + * permitted to traverse the prev pointers without excluding + * this function. + */ + + last->next = at; + smp_wmb(); + head->next = first; + first->prev = head; + at->prev = last; +} + +/** + * list_for_each_rcu - iterate over an rcu-protected list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_rcu(pos, head) \ + for (pos = (head)->next; \ + prefetch(rcu_dereference(pos)->next), pos != (head); \ + pos = pos->next) + +#define __list_for_each_rcu(pos, head) \ + for (pos = (head)->next; \ + rcu_dereference(pos) != (head); \ + pos = pos->next) + +/** + * list_for_each_safe_rcu + * @pos: the &struct list_head to use as a loop cursor. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + * + * Iterate over an rcu-protected list, safe against removal of list entry. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_safe_rcu(pos, n, head) \ + for (pos = (head)->next; \ + n = rcu_dereference(pos)->next, pos != (head); \ + pos = n) + +/** + * list_for_each_entry_rcu - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_entry_rcu(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + prefetch(rcu_dereference(pos)->member.next), \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + + +/** + * list_for_each_continue_rcu + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * Iterate over an rcu-protected list, continuing after current point. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_continue_rcu(pos, head) \ + for ((pos) = (pos)->next; \ + prefetch(rcu_dereference((pos))->next), (pos) != (head); \ + (pos) = (pos)->next) + +/** + * hlist_del_rcu - deletes entry from hash list without re-initialization + * @n: the element to delete from the hash list. + * + * Note: list_unhashed() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry(). + */ +static inline void hlist_del_rcu(struct hlist_node *n) +{ + __hlist_del(n); + n->pprev = LIST_POISON2; +} + +/** + * hlist_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + */ +static inline void hlist_replace_rcu(struct hlist_node *old, + struct hlist_node *new) +{ + struct hlist_node *next = old->next; + + new->next = next; + new->pprev = old->pprev; + smp_wmb(); + if (next) + new->next->pprev = &new->next; + *new->pprev = new; + old->pprev = LIST_POISON2; +} + +/** + * hlist_add_head_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_add_head_rcu(struct hlist_node *n, + struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + n->pprev = &h->first; + smp_wmb(); + if (first) + first->pprev = &n->next; + h->first = n; +} + +/** + * hlist_add_before_rcu + * @n: the new element to add to the hash list. + * @next: the existing element to add the new element before. + * + * Description: + * Adds the specified element to the specified hlist + * before the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_before_rcu(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + smp_wmb(); + next->pprev = &n->next; + *(n->pprev) = n; +} + +/** + * hlist_add_after_rcu + * @prev: the existing element to add the new element after. + * @n: the new element to add to the hash list. + * + * Description: + * Adds the specified element to the specified hlist + * after the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_after_rcu(struct hlist_node *prev, + struct hlist_node *n) +{ + n->next = prev->next; + n->pprev = &prev->next; + smp_wmb(); + prev->next = n; + if (n->next) + n->next->pprev = &n->next; +} + +/** + * hlist_for_each_entry_rcu - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as hlist_add_head_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = (head)->first; \ + rcu_dereference(pos) && ({ prefetch(pos->next); 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ + pos = pos->next) + +#endif /* __KERNEL__ */ +#endif -- cgit v1.2.2 From 10aa9d2cf9878757b003023d33ff90a37aa3044b Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Mon, 12 May 2008 21:21:06 +0200 Subject: rculist.h: use the rcu API Make almost all list mutation primitives use rcu_assign_pointer(). The main point of this being readability improvement. Signed-off-by: Franck Bui-Huu Cc: "Paul E. McKenney" Cc: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/rculist.h | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index aa9b3eb15683..8d2c81fccfe5 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -7,6 +7,7 @@ * RCU-protected list version */ #include +#include /* * Insert a new entry between two known consecutive entries. @@ -19,9 +20,8 @@ static inline void __list_add_rcu(struct list_head *new, { new->next = next; new->prev = prev; - smp_wmb(); + rcu_assign_pointer(prev->next, new); next->prev = new; - prev->next = new; } /** @@ -110,9 +110,8 @@ static inline void list_replace_rcu(struct list_head *old, { new->next = old->next; new->prev = old->prev; - smp_wmb(); + rcu_assign_pointer(new->prev->next, new); new->next->prev = new; - new->prev->next = new; old->prev = LIST_POISON2; } @@ -166,8 +165,7 @@ static inline void list_splice_init_rcu(struct list_head *list, */ last->next = at; - smp_wmb(); - head->next = first; + rcu_assign_pointer(head->next, first); first->prev = head; at->prev = last; } @@ -280,10 +278,9 @@ static inline void hlist_replace_rcu(struct hlist_node *old, new->next = next; new->pprev = old->pprev; - smp_wmb(); + rcu_assign_pointer(*new->pprev, new); if (next) new->next->pprev = &new->next; - *new->pprev = new; old->pprev = LIST_POISON2; } @@ -310,12 +307,12 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, struct hlist_head *h) { struct hlist_node *first = h->first; + n->next = first; n->pprev = &h->first; - smp_wmb(); + rcu_assign_pointer(h->first, n); if (first) first->pprev = &n->next; - h->first = n; } /** @@ -341,9 +338,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, { n->pprev = next->pprev; n->next = next; - smp_wmb(); + rcu_assign_pointer(*(n->pprev), n); next->pprev = &n->next; - *(n->pprev) = n; } /** @@ -369,8 +365,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, { n->next = prev->next; n->pprev = &prev->next; - smp_wmb(); - prev->next = n; + rcu_assign_pointer(prev->next, n); if (n->next) n->next->pprev = &n->next; } -- cgit v1.2.2 From 78b0e0e9b27b62c4b22f05a147f7a80fa58b1ae3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:06 +0200 Subject: RCU, rculist.h: fix list iterators RCU list iterators: should prefetch ever be optimised out with no side-effects, the current version will lose the barrier completely. Pointed-out-by: Linus Torvalds Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rculist.h | 48 +++++++++++++++--------------------------------- 1 file changed, 15 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 8d2c81fccfe5..b0f39be08b6c 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -180,31 +180,14 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_rcu(pos, head) \ - for (pos = (head)->next; \ - prefetch(rcu_dereference(pos)->next), pos != (head); \ - pos = pos->next) + for (pos = rcu_dereference((head)->next); \ + prefetch(pos->next), pos != (head); \ + pos = rcu_dereference(pos->next)) #define __list_for_each_rcu(pos, head) \ - for (pos = (head)->next; \ - rcu_dereference(pos) != (head); \ - pos = pos->next) - -/** - * list_for_each_safe_rcu - * @pos: the &struct list_head to use as a loop cursor. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - * - * Iterate over an rcu-protected list, safe against removal of list entry. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_safe_rcu(pos, n, head) \ - for (pos = (head)->next; \ - n = rcu_dereference(pos)->next, pos != (head); \ - pos = n) + for (pos = rcu_dereference((head)->next); \ + pos != (head); \ + pos = rcu_dereference(pos->next)) /** * list_for_each_entry_rcu - iterate over rcu list of given type @@ -217,10 +200,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - prefetch(rcu_dereference(pos)->member.next), \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) + for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ + prefetch(pos->member.next), &pos->member != (head); \ + pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) /** @@ -235,9 +217,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_continue_rcu(pos, head) \ - for ((pos) = (pos)->next; \ - prefetch(rcu_dereference((pos))->next), (pos) != (head); \ - (pos) = (pos)->next) + for ((pos) = rcu_dereference((pos)->next); \ + prefetch((pos)->next), (pos) != (head); \ + (pos) = rcu_dereference((pos)->next)) /** * hlist_del_rcu - deletes entry from hash list without re-initialization @@ -382,10 +364,10 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * as long as the traversal is guarded by rcu_read_lock(). */ #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = (head)->first; \ - rcu_dereference(pos) && ({ prefetch(pos->next); 1; }) && \ + for (pos = rcu_dereference((head)->first); \ + pos && ({ prefetch(pos->next); 1; }) && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = pos->next) + pos = rcu_dereference(pos->next)) #endif /* __KERNEL__ */ #endif -- cgit v1.2.2 From 1a189b97190d3f0f8cf0379a799d3555b2d648bb Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 13 Apr 2008 21:41:55 +0100 Subject: [ARM] pxa: Add bare bones PWM API Signed-off-by: Russell King --- include/linux/pwm.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/linux/pwm.h (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h new file mode 100644 index 000000000000..3945f803d514 --- /dev/null +++ b/include/linux/pwm.h @@ -0,0 +1,31 @@ +#ifndef __LINUX_PWM_H +#define __LINUX_PWM_H + +struct pwm_device; + +/* + * pwm_request - request a PWM device + */ +struct pwm_device *pwm_request(int pwm_id, const char *label); + +/* + * pwm_free - free a PWM device + */ +void pwm_free(struct pwm_device *pwm); + +/* + * pwm_config - change a PWM device configuration + */ +int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns); + +/* + * pwm_enable - start a PWM output toggling + */ +int pwm_enable(struct pwm_device *pwm); + +/* + * pwm_disable - stop a PWM output toggling + */ +void pwm_disable(struct pwm_device *pwm); + +#endif /* __ASM_ARCH_PWM_H */ -- cgit v1.2.2 From bd3bff9e20f454b242d979ec2f9a4dca0d5fa06f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:41 +0200 Subject: sched: add latency tracer callbacks to the scheduler add 3 lightweight callbacks to the tracer backend. zero impact if tracing is turned off. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a6176f4b..717cab8a0c83 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2117,6 +2117,32 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +extern void +ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next); +#else +static inline void +ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +{ +} +#endif + +#ifdef CONFIG_SCHED_TRACER +extern void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); +extern void +ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr); +#else +static inline void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +{ +} +static inline void +ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) +{ +} +#endif + extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); -- cgit v1.2.2 From 7c731e0a495e25e79dc1e9e68772a67a55721a65 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: ftrace: make the task state char-string visible to all The tracer wants to be able to convert the state number into a user visible character. This patch pulls that conversion string out the scheduler into the header. This way if it were to ever change, other parts of the kernel will know. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 717cab8a0c83..6e26f1fdbfe2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2237,6 +2237,8 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ +#define TASK_STATE_TO_CHAR_STR "RSDTtZX" + #endif /* __KERNEL__ */ #endif -- cgit v1.2.2 From 502825282e6f79c975a644afc124432ec1744de4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: ftrace: add preempt_enable/disable notrace macros The tracer may need to call preempt_enable and disable functions for time keeping and such. The trace gets ugly when we see these functions show up for all traces. To make the output cleaner this patch adds preempt_enable_notrace and preempt_disable_notrace to be used by tracer (and debugging) functions. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/preempt.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 23f0c54175cd..36b03d50bf40 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -52,6 +52,34 @@ do { \ preempt_check_resched(); \ } while (0) +/* For debugging and tracer internals only! */ +#define add_preempt_count_notrace(val) \ + do { preempt_count() += (val); } while (0) +#define sub_preempt_count_notrace(val) \ + do { preempt_count() -= (val); } while (0) +#define inc_preempt_count_notrace() add_preempt_count_notrace(1) +#define dec_preempt_count_notrace() sub_preempt_count_notrace(1) + +#define preempt_disable_notrace() \ +do { \ + inc_preempt_count_notrace(); \ + barrier(); \ +} while (0) + +#define preempt_enable_no_resched_notrace() \ +do { \ + barrier(); \ + dec_preempt_count_notrace(); \ +} while (0) + +/* preempt_check_resched is OK to trace */ +#define preempt_enable_notrace() \ +do { \ + preempt_enable_no_resched_notrace(); \ + barrier(); \ + preempt_check_resched(); \ +} while (0) + #else #define preempt_disable() do { } while (0) @@ -59,6 +87,10 @@ do { \ #define preempt_enable() do { } while (0) #define preempt_check_resched() do { } while (0) +#define preempt_disable_notrace() do { } while (0) +#define preempt_enable_no_resched_notrace() do { } while (0) +#define preempt_enable_notrace() do { } while (0) + #endif #ifdef CONFIG_PREEMPT_NOTIFIERS -- cgit v1.2.2 From ffdc1a09ae7e2cbd714a446ee38a27f625b5f1c8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:41 +0200 Subject: tracing: add notrace to linkage.h notrace signals that a function should not be traced. Most of the time this is used by tracers to annotate code that cannot be traced - it's in a volatile state (such as in user vdso context or NMI context) or it's in the tracer internals. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/linkage.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 2119610b24f8..14f329c64ba8 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -3,6 +3,8 @@ #include +#define notrace __attribute__((no_instrument_function)) + #ifdef __cplusplus #define CPP_ASMLINKAGE extern "C" #else -- cgit v1.2.2 From 16444a8a40d4c7b4f6de34af0cae1f76a4f6c901 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: add basic support for gcc profiler instrumentation If CONFIG_FTRACE is selected and /proc/sys/kernel/ftrace_enabled is set to a non-zero value the ftrace routine will be called everytime we enter a kernel function that is not marked with the "notrace" attribute. The ftrace routine will then call a registered function if a function happens to be registered. [ This code has been highly hacked by Steven Rostedt and Ingo Molnar, so don't blame Arnaldo for all of this ;-) ] Update: It is now possible to register more than one ftrace function. If only one ftrace function is registered, that will be the function that ftrace calls directly. If more than one function is registered, then ftrace will call a function that will loop through the functions to call. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 include/linux/ftrace.h (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h new file mode 100644 index 000000000000..b96ef14c249a --- /dev/null +++ b/include/linux/ftrace.h @@ -0,0 +1,38 @@ +#ifndef _LINUX_FTRACE_H +#define _LINUX_FTRACE_H + +#ifdef CONFIG_FTRACE + +#include + +#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +#define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +#define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) + +typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); + +struct ftrace_ops { + ftrace_func_t func; + struct ftrace_ops *next; +}; + +/* + * The ftrace_ops must be a static and should also + * be read_mostly. These functions do modify read_mostly variables + * so use them sparely. Never free an ftrace_op or modify the + * next pointer after it has been registered. Even after unregistering + * it, the next pointer may still be used internally. + */ +int register_ftrace_function(struct ftrace_ops *ops); +int unregister_ftrace_function(struct ftrace_ops *ops); +void clear_ftrace_function(void); + +extern void ftrace_stub(unsigned long a0, unsigned long a1); +extern void mcount(void); + +#else /* !CONFIG_FTRACE */ +# define register_ftrace_function(ops) do { } while (0) +# define unregister_ftrace_function(ops) do { } while (0) +# define clear_ftrace_function(ops) do { } while (0) +#endif /* CONFIG_FTRACE */ +#endif /* _LINUX_FTRACE_H */ -- cgit v1.2.2 From 352ad25aa4a189c667cb2af333948d34692a2d27 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: tracer for scheduler wakeup latency This patch adds the tracer that tracks the wakeup latency of the highest priority waking task. "wakeup" is added to /debugfs/tracing/available_tracers Also added to /debugfs/tracing tracing_max_latency holds the current max latency for the wakeup wakeup_thresh if set to other than zero, a log will be recorded for every wakeup that takes longer than the number entered in here (usecs for all counters) (deletes previous trace) Examples: (with ftrace_enabled = 0) ============ preemption latency trace v1.1.5 on 2.6.24-rc8 Signed-off-by: Ingo Molnar -------------------------------------------------------------------- latency: 26 us, #2/2, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: migration/0-3 (uid:0 nice:-5 policy:1 rt_prio:99) ----------------- _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / quilt-8551 0d..3 0us+: wake_up_process+0x15/0x17 (sched_exec+0xc9/0x100 ) quilt-8551 0d..4 26us : sched_switch_callback+0x73/0x81 (schedule+0x483/0x6d5 ) vim:ft=help ============ (with ftrace_enabled = 1) ============ preemption latency trace v1.1.5 on 2.6.24-rc8 -------------------------------------------------------------------- latency: 36 us, #45/45, CPU#0 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: migration/1-5 (uid:0 nice:-5 policy:1 rt_prio:99) ----------------- _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / bash-10653 1d..3 0us : wake_up_process+0x15/0x17 (sched_exec+0xc9/0x100 ) bash-10653 1d..3 1us : try_to_wake_up+0x271/0x2e7 (sub_preempt_count+0xc/0x7a ) bash-10653 1d..2 2us : try_to_wake_up+0x296/0x2e7 (update_rq_clock+0x9/0x20 ) bash-10653 1d..2 2us : update_rq_clock+0x1e/0x20 (__update_rq_clock+0xc/0x90 ) bash-10653 1d..2 3us : __update_rq_clock+0x1b/0x90 (sched_clock+0x9/0x29 ) bash-10653 1d..2 4us : try_to_wake_up+0x2a6/0x2e7 (activate_task+0xc/0x3f ) bash-10653 1d..2 4us : activate_task+0x2d/0x3f (enqueue_task+0xe/0x66 ) bash-10653 1d..2 5us : enqueue_task+0x5b/0x66 (enqueue_task_rt+0x9/0x3c ) bash-10653 1d..2 6us : try_to_wake_up+0x2ba/0x2e7 (check_preempt_wakeup+0x12/0x99 ) [...] bash-10653 1d..5 33us : tracing_record_cmdline+0xcf/0xd4 (_spin_unlock+0x9/0x33 ) bash-10653 1d..5 34us : _spin_unlock+0x19/0x33 (sub_preempt_count+0xc/0x7a ) bash-10653 1d..4 35us : wakeup_sched_switch+0x65/0x2ff (_spin_lock_irqsave+0xc/0xa9 ) bash-10653 1d..4 35us : _spin_lock_irqsave+0x19/0xa9 (add_preempt_count+0xe/0x77 ) bash-10653 1d..4 36us : sched_switch_callback+0x73/0x81 (schedule+0x483/0x6d5 ) vim:ft=help ============ The [...] was added here to not waste your email box space. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b96ef14c249a..db8a5e7abe41 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -5,10 +5,6 @@ #include -#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -#define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) -#define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) - typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); struct ftrace_ops { @@ -35,4 +31,23 @@ extern void mcount(void); # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) #endif /* CONFIG_FTRACE */ + + +#ifdef CONFIG_FRAME_POINTER +/* TODO: need to fix this for ARM */ +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) +# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) +# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) +# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +#else +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 0UL +# define CALLER_ADDR2 0UL +# define CALLER_ADDR3 0UL +# define CALLER_ADDR4 0UL +# define CALLER_ADDR5 0UL +#endif + #endif /* _LINUX_FTRACE_H */ -- cgit v1.2.2 From 81d68a96a39844853b37f20cc8282d9b65b78ef3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: trace irq disabled critical timings This patch adds latency tracing for critical timings (how long interrupts are disabled for). "irqsoff" is added to /debugfs/tracing/available_tracers Note: tracing_max_latency also holds the max latency for irqsoff (in usecs). (default to large number so one must start latency tracing) tracing_thresh threshold (in usecs) to always print out if irqs off is detected to be longer than stated here. If irq_thresh is non-zero, then max_irq_latency is ignored. Here's an example of a trace with ftrace_enabled = 0 ======= preemption latency trace v1.1.5 on 2.6.24-rc7 Signed-off-by: Ingo Molnar -------------------------------------------------------------------- latency: 100 us, #3/3, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- => started at: _spin_lock_irqsave+0x2a/0xb7 => ended at: _spin_unlock_irqrestore+0x32/0x5f _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1d.s3 0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000]) swapper-0 1d.s3 100us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1d.s3 100us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f) vim:ft=help ======= And this is a trace with ftrace_enabled == 1 ======= preemption latency trace v1.1.5 on 2.6.24-rc7 -------------------------------------------------------------------- latency: 102 us, #12/12, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- => started at: _spin_lock_irqsave+0x2a/0xb7 => ended at: _spin_unlock_irqrestore+0x32/0x5f _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1dNs3 0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000]) swapper-0 1dNs3 46us : e1000_read_phy_reg+0x16/0x225 [e1000] (e1000_update_stats+0x5e2/0x64c [e1000]) swapper-0 1dNs3 46us : e1000_swfw_sync_acquire+0x10/0x99 [e1000] (e1000_read_phy_reg+0x49/0x225 [e1000]) swapper-0 1dNs3 46us : e1000_get_hw_eeprom_semaphore+0x12/0xa6 [e1000] (e1000_swfw_sync_acquire+0x36/0x99 [e1000]) swapper-0 1dNs3 47us : __const_udelay+0x9/0x47 (e1000_read_phy_reg+0x116/0x225 [e1000]) swapper-0 1dNs3 47us+: __delay+0x9/0x50 (__const_udelay+0x45/0x47) swapper-0 1dNs3 97us : preempt_schedule+0xc/0x84 (__delay+0x4e/0x50) swapper-0 1dNs3 98us : e1000_swfw_sync_release+0xc/0x55 [e1000] (e1000_read_phy_reg+0x211/0x225 [e1000]) swapper-0 1dNs3 99us+: e1000_put_hw_eeprom_semaphore+0x9/0x35 [e1000] (e1000_swfw_sync_release+0x50/0x55 [e1000]) swapper-0 1dNs3 101us : _spin_unlock_irqrestore+0xe/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1dNs3 102us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1dNs3 102us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f) vim:ft=help ======= Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++++++ include/linux/irqflags.h | 12 ++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index db8a5e7abe41..0a20445dcbcc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -50,4 +50,12 @@ extern void mcount(void); # define CALLER_ADDR5 0UL #endif +#ifdef CONFIG_IRQSOFF_TRACER + extern void notrace time_hardirqs_on(unsigned long a0, unsigned long a1); + extern void notrace time_hardirqs_off(unsigned long a0, unsigned long a1); +#else +# define time_hardirqs_on(a0, a1) do { } while (0) +# define time_hardirqs_off(a0, a1) do { } while (0) +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index e600c4e9b8c5..5b711d4e9fd9 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -12,10 +12,10 @@ #define _LINUX_TRACE_IRQFLAGS_H #ifdef CONFIG_TRACE_IRQFLAGS - extern void trace_hardirqs_on(void); - extern void trace_hardirqs_off(void); extern void trace_softirqs_on(unsigned long ip); extern void trace_softirqs_off(unsigned long ip); + extern void trace_hardirqs_on(void); + extern void trace_hardirqs_off(void); # define trace_hardirq_context(p) ((p)->hardirq_context) # define trace_softirq_context(p) ((p)->softirq_context) # define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) @@ -41,6 +41,14 @@ # define INIT_TRACE_IRQFLAGS #endif +#ifdef CONFIG_IRQSOFF_TRACER + extern void stop_critical_timings(void); + extern void start_critical_timings(void); +#else +# define stop_critical_timings() do { } while (0) +# define start_critical_timings() do { } while (0) +#endif + #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #include -- cgit v1.2.2 From 6cd8a4bb2f97527a9ceb30bc77ea4e959c6a95e3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: trace preempt off critical timings Add preempt off timings. A lot of kernel core code is taken from the RT patch latency trace that was written by Ingo Molnar. This adds "preemptoff" and "preemptirqsoff" to /debugfs/tracing/available_tracers Now instead of just tracing irqs off, preemption off can be selected to be recorded. When this is selected, it shares the same files as irqs off timings. One can either trace preemption off, irqs off, or one or the other off. By echoing "preemptoff" into /debugfs/tracing/current_tracer, recording of preempt off only is performed. "irqsoff" will only record the time irqs are disabled, but "preemptirqsoff" will take the total time irqs or preemption are disabled. Runtime switching of these options is now supported by simpling echoing in the appropriate trace name into /debugfs/tracing/current_tracer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++++++ include/linux/irqflags.h | 3 ++- include/linux/preempt.h | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0a20445dcbcc..740c97dcf9cb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,4 +58,12 @@ extern void mcount(void); # define time_hardirqs_off(a0, a1) do { } while (0) #endif +#ifdef CONFIG_PREEMPT_TRACER + extern void notrace trace_preempt_on(unsigned long a0, unsigned long a1); + extern void notrace trace_preempt_off(unsigned long a0, unsigned long a1); +#else +# define trace_preempt_on(a0, a1) do { } while (0) +# define trace_preempt_off(a0, a1) do { } while (0) +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5b711d4e9fd9..2b1c2e58566e 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -41,7 +41,8 @@ # define INIT_TRACE_IRQFLAGS #endif -#ifdef CONFIG_IRQSOFF_TRACER +#if defined(CONFIG_IRQSOFF_TRACER) || \ + defined(CONFIG_PREEMPT_TRACER) extern void stop_critical_timings(void); extern void start_critical_timings(void); #else diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 36b03d50bf40..72b1a10a59b6 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -10,7 +10,7 @@ #include #include -#ifdef CONFIG_DEBUG_PREEMPT +#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void add_preempt_count(int val); extern void sub_preempt_count(int val); #else -- cgit v1.2.2 From 3d0833953e1b98b79ddf491dd49229eef9baeac1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: dynamic enabling/disabling of function calls This patch adds a feature to dynamically replace the ftrace code with the jmps to allow a kernel with ftrace configured to run as fast as it can without it configured. The way this works, is on bootup (if ftrace is enabled), a ftrace function is registered to record the instruction pointer of all places that call the function. Later, if there's still any code to patch, a kthread is awoken (rate limited to at most once a second) that performs a stop_machine, and replaces all the code that was called with a jmp over the call to ftrace. It only replaces what was found the previous time. Typically the system reaches equilibrium quickly after bootup and there's no code patching needed at all. e.g. call ftrace /* 5 bytes */ is replaced with jmp 3f /* jmp is 2 bytes and we jump 3 forward */ 3: When we want to enable ftrace for function tracing, the IP recording is removed, and stop_machine is called again to replace all the locations of that were recorded back to the call of ftrace. When it is disabled, we replace the code back to the jmp. Allocation is done by the kthread. If the ftrace recording function is called, and we don't have any record slots available, then we simply skip that call. Once a second a new page (if needed) is allocated for recording new ftrace function calls. A large batch is allocated at boot up to get most of the calls there. Because we do this via stop_machine, we don't have to worry about another CPU executing a ftrace call as we modify it. But we do need to worry about NMI's so all functions that might be called via nmi must be annotated with notrace_nmi. When this code is configured in, the NMI code will not call notrace. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 740c97dcf9cb..90dbc0ee2046 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -32,6 +32,24 @@ extern void mcount(void); # define clear_ftrace_function(ops) do { } while (0) #endif /* CONFIG_FTRACE */ +#ifdef CONFIG_DYNAMIC_FTRACE +# define FTRACE_HASHBITS 10 +# define FTRACE_HASHSIZE (1< Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: add ftrace_enabled sysctl to disable mcount function This patch adds back the sysctl ftrace_enabled. This time it is defaulted to on, if DYNAMIC_FTRACE is configured. When ftrace_enabled is disabled, the ftrace function is set to the stub return. If DYNAMIC_FTRACE is also configured, on ftrace_enabled = 0, the registered ftrace functions will all be set to jmps, but no more new calls to ftrace recording (used to find the ftrace calling sites) will be called. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 90dbc0ee2046..ccd8537dbdb7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -5,6 +5,12 @@ #include +extern int ftrace_enabled; +extern int +ftrace_enable_sysctl(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); struct ftrace_ops { -- cgit v1.2.2 From 3c1720f00bb619302ba19d55986ab565e74d06db Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: move memory management out of arch code This patch moves the memory management of the ftrace records out of the arch code and into the generic code making the arch code simpler. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ccd8537dbdb7..d509ad6c9cb8 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -42,19 +42,23 @@ extern void mcount(void); # define FTRACE_HASHBITS 10 # define FTRACE_HASHSIZE (1< Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: use dynamic patching for updating mcount calls This patch replaces the indirect call to the mcount function pointer with a direct call that will be patched by the dynamic ftrace routines. On boot up, the mcount function calls the ftace_stub function. When the dynamic ftrace code is initialized, the ftrace_stub is replaced with a call to the ftrace_record_ip, which records the instruction pointers of the locations that call it. Later, the ftraced daemon will call kstop_machine and patch all the locations to nops. When a ftrace is enabled, the original calls to mcount will now be set top call ftrace_caller, which will do a direct call to the registered ftrace function. This direct call is also patched when the function that should be called is updated. All patching is performed by a kstop_machine routine to prevent any type of race conditions that is associated with modifying code on the fly. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index d509ad6c9cb8..b0dd0093058f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -56,9 +56,14 @@ struct dyn_ftrace { extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); -extern int ftrace_dyn_arch_init(void); +extern int ftrace_dyn_arch_init(void *data); +extern int ftrace_mcount_set(unsigned long *data); extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code); +extern int ftrace_update_ftrace_func(ftrace_func_t func); +extern void ftrace_caller(void); +extern void ftrace_call(void); +extern void mcount_call(void); #endif #ifdef CONFIG_FRAME_POINTER -- cgit v1.2.2 From 5072c59fd45e9976d02ee6f18c7336ef97623cbc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: add filter select functions to trace This patch adds two files to the debugfs system: /debugfs/tracing/available_filter_functions and /debugfs/tracing/set_ftrace_filter The available_filter_functions lists all functions that has been recorded by the ftraced that has called the ftrace_record_ip function. This is to allow users to see what functions have been converted to nops and can be enabled for tracing. To enable functions, simply echo the names (whitespace delimited) into set_ftrace_filter. Simple wildcards are also allowed. echo 'scheduler' > /debugfs/tracing/set_ftrace_filter Will have only the scheduler be activated when tracing is enabled. echo 'sched_*' > /debugfs/tracing/set_ftrace_filter Will have only the functions starting with 'sched_' be activated. echo '*lock' > /debugfs/tracing/set_ftrace_filter Will have only functions ending with 'lock' be activated. echo '*lock*' > /debugfs/tracing/set_ftrace_filter Will have only functions with 'lock' in its name be activated. Note: 'sched*lock' will not work. The only wildcards that are allowed is an asterisk and the beginning and or end of the string passed in. Multiple names can be passed in with whitespace delimited: echo 'scheduler *lock *acpi*' > /debugfs/tracing/set_ftrace_filter is also the same as: echo 'scheduler' > /debugfs/tracing/set_ftrace_filter echo '*lock' >> /debugfs/tracing/set_ftrace_filter echo '*acpi*' >> /debugfs/tracing/set_ftrace_filter Appending does just that. It appends to the list. To disable all filters simply echo an empty line in: echo > /debugfs/tracing/set_ftrace_filter Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b0dd0093058f..f5911d2d42c3 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -43,7 +43,9 @@ extern void mcount(void); # define FTRACE_HASHSIZE (1< Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: fix kexec disable the tracer while kexec pulls the rug from under the old kernel. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f5911d2d42c3..a42390c1d6e1 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -68,6 +68,13 @@ extern void ftrace_call(void); extern void mcount_call(void); #endif +static inline void tracer_disable(void) +{ +#ifdef CONFIG_FTRACE + ftrace_enabled = 0; +#endif +} + #ifdef CONFIG_FRAME_POINTER /* TODO: need to fix this for ARM */ # define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -- cgit v1.2.2 From e1c08bdd9fa73e44096e5a82c0d5928b04ab02c8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: force recording Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a42390c1d6e1..2c1670c65236 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -54,6 +54,8 @@ struct dyn_ftrace { unsigned long flags; }; +int ftrace_force_update(void); + /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); @@ -66,6 +68,8 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +#else +# define ftrace_force_update() do { } while (0) #endif static inline void tracer_disable(void) -- cgit v1.2.2 From c7aafc549766b87819285d3480648fc652a47bc4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: cleanups factor out code and clean it up. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2c1670c65236..953a36d6a199 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -69,7 +69,7 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); #else -# define ftrace_force_update() do { } while (0) +# define ftrace_force_update() ({ 0; }) #endif static inline void tracer_disable(void) -- cgit v1.2.2 From 77a2b37d227483fe52aead242652aee406c25bf0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: startup tester on dynamic tracing. This patch adds a startup self test on dynamic code modification and filters. The test filters on a specific function, makes sure that no other function is traced, exectutes the function, then makes sure that the function is traced. This patch also fixes a slight bug with the ftrace selftest, where tracer_enabled was not being set. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 953a36d6a199..a842d96c6343 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -55,6 +55,7 @@ struct dyn_ftrace { }; int ftrace_force_update(void); +void ftrace_set_filter(unsigned char *buf, int len, int reset); /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); @@ -70,6 +71,7 @@ extern void ftrace_call(void); extern void mcount_call(void); #else # define ftrace_force_update() ({ 0; }) +# define ftrace_set_filter(buf, len, reset) do { } while (0) #endif static inline void tracer_disable(void) -- cgit v1.2.2 From 37ad508419f0fdfda7b378756eb1f35cfd26d96d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace - fix dynamic ftrace memory leak The ftrace dynamic function update allocates a record to store the instruction pointers that are being modified. If the modified instruction pointer fails to update, then the record is marked as failed and nothing more is done. Worse, if the modification fails, but the record ip function is still called, it will allocate a new record and try again. In just a matter of time, will this cause a serious memory leak and crash the system. This patch plugs this memory leak. When a record fails, it is included back into the pool of records to be used. Now a record may fail over and over again, but the number of allocated records will not increase. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a842d96c6343..61e757bd2350 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -43,9 +43,10 @@ extern void mcount(void); # define FTRACE_HASHSIZE (1< Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace: disable tracing on failure Since ftrace touches practically every function. If we detect any anomaly, we want to fully disable ftrace. This patch adds code to try shutdown ftrace as much as possible without doing any more harm is something is detected not quite correct. This only kills ftrace, this patch does have checks for other parts of the tracer (irqsoff, wakeup, etc.). Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 61e757bd2350..4650a3160b7f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,6 +58,9 @@ struct dyn_ftrace { int ftrace_force_update(void); void ftrace_set_filter(unsigned char *buf, int len, int reset); +/* totally disable ftrace - can not re-enable after this */ +void ftrace_kill(void); + /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); -- cgit v1.2.2 From aeaee8a2c9cb4489f166ca0e39c568e8254faaa6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: build fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4650a3160b7f..08fbef1744cc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,9 +58,6 @@ struct dyn_ftrace { int ftrace_force_update(void); void ftrace_set_filter(unsigned char *buf, int len, int reset); -/* totally disable ftrace - can not re-enable after this */ -void ftrace_kill(void); - /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); @@ -74,10 +71,13 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); #else -# define ftrace_force_update() ({ 0; }) -# define ftrace_set_filter(buf, len, reset) do { } while (0) +# define ftrace_force_update() ({ 0; }) +# define ftrace_set_filter(buf, len, reset) do { } while (0) #endif +/* totally disable ftrace - can not re-enable after this */ +void ftrace_kill(void); + static inline void tracer_disable(void) { #ifdef CONFIG_FTRACE -- cgit v1.2.2 From 86387f7ee5d3273ff4859e2c64ce656639b6ca65 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: add stack tracing Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 08fbef1744cc..0d3714e7110b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -93,6 +93,7 @@ static inline void tracer_disable(void) # define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) # define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) # define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6)) #else # define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) # define CALLER_ADDR1 0UL @@ -100,6 +101,7 @@ static inline void tracer_disable(void) # define CALLER_ADDR3 0UL # define CALLER_ADDR4 0UL # define CALLER_ADDR5 0UL +# define CALLER_ADDR6 0UL #endif #ifdef CONFIG_IRQSOFF_TRACER -- cgit v1.2.2 From 8ac0fca4ccb355ce50471d7aa3f10f5900b28b95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: sched tracer fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e26f1fdbfe2..05744f9cb096 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2130,17 +2130,11 @@ ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) #ifdef CONFIG_SCHED_TRACER extern void ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); -extern void -ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr); #else static inline void ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) { } -static inline void -ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) -{ -} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -- cgit v1.2.2 From 4e65551905fb0300ae7e667cbaa41ee2e3f29a13 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: sched tracer, trace full rbtree Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 05744f9cb096..652d380ae563 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2119,20 +2119,34 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern void -ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next); +ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); +extern void +ftrace_wake_up_task(void *rq, struct task_struct *wakee, + struct task_struct *curr); +extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void -ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) +{ +} +static inline void +sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3) +{ +} +static inline void +ftrace_wake_up_task(void *rq, struct task_struct *wakee, + struct task_struct *curr) +{ +} +static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } -#endif - -#ifdef CONFIG_SCHED_TRACER -extern void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); -#else static inline void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) { } #endif -- cgit v1.2.2 From 017730c11241e26577673eb9d957cfc66172ea91 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix wakeups Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 652d380ae563..a3970b563757 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -246,6 +246,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); +extern int runqueue_is_locked(void); + extern cpumask_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); -- cgit v1.2.2 From 1a3c3034336320554a3342572dae98d69e054fc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix __trace_special() Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a3970b563757..5b186bed54bc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2119,6 +2119,18 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif +#ifdef CONFIG_TRACING +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); +#else +static inline void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ +} +#endif + #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); @@ -2126,9 +2138,6 @@ extern void ftrace_wake_up_task(void *rq, struct task_struct *wakee, struct task_struct *curr); extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); -extern void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) @@ -2146,11 +2155,6 @@ ftrace_wake_up_task(void *rq, struct task_struct *wakee, static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } -static inline void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -- cgit v1.2.2 From 88a4216c3ec4281fc7e6725cc3a3ccd01fb1aa14 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: sched special Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5b186bed54bc..360ca99033d2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2138,6 +2138,8 @@ extern void ftrace_wake_up_task(void *rq, struct task_struct *wakee, struct task_struct *curr); extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) @@ -2155,6 +2157,10 @@ ftrace_wake_up_task(void *rq, struct task_struct *wakee, static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ +} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -- cgit v1.2.2 From 3eefae994d9224fb7771a3ddb683868363c23510 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:04 +0200 Subject: ftrace: limit trace entries Currently there is no protection from the root user to use up all of memory for trace buffers. If the root user allocates too many entries, the OOM killer might start kill off all tasks. This patch adds an algorith to check the following condition: pages_requested > (freeable_memory + current_trace_buffer_pages) / 4 If the above is met then the allocation fails. The above prevents more than 1/4th of freeable memory from being used by trace buffers. To determine the freeable_memory, I made determine_dirtyable_memory in mm/page-writeback.c global. Special thanks goes to Peter Zijlstra for suggesting the above calculation. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/writeback.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f462439cc288..bd91987c065f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -105,6 +105,8 @@ extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; +extern unsigned long determine_dirtyable_memory(void); + extern int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); -- cgit v1.2.2 From dc102a8fae2d0d6bf5223fc549247f2e23959ae6 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:09 +0200 Subject: Markers - remove extra format argument Denys Vlasenko : > Not in this patch, but I noticed: > > #define __trace_mark(name, call_private, format, args...) \ > do { \ > static const char __mstrtab_##name[] \ > __attribute__((section("__markers_strings"))) \ > = #name "\0" format; \ > static struct marker __mark_##name \ > __attribute__((section("__markers"), aligned(8))) = \ > { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ > 0, 0, marker_probe_cb, \ > { __mark_empty_function, NULL}, NULL }; \ > __mark_check_format(format, ## args); \ > if (unlikely(__mark_##name.state)) { \ > (*__mark_##name.call) \ > (&__mark_##name, call_private, \ > format, ## args); \ > } \ > } while (0) > > In this call: > > (*__mark_##name.call) \ > (&__mark_##name, call_private, \ > format, ## args); \ > > you make gcc allocate duplicate format string. You can use > &__mstrtab_##name[sizeof(#name)] instead since it holds the same string, > or drop ", format," above and "const char *fmt" from here: > > void (*call)(const struct marker *mdata, /* Probe wrapper */ > void *call_private, const char *fmt, ...); > > since mdata->format is the same and all callees which need it can take it there. Very good point. I actually thought about dropping it, since it would remove an unnecessary argument from the stack. And actually, since I now have the marker_probe_cb sitting between the marker site and the callbacks, there is no API change required. Thanks :) Mathieu Signed-off-by: Mathieu Desnoyers CC: Denys Vlasenko Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/marker.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 430f6adf9762..338533abb47b 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -44,8 +44,8 @@ struct marker { */ char state; /* Marker state. */ char ptype; /* probe type : 0 : single, 1 : multi */ - void (*call)(const struct marker *mdata, /* Probe wrapper */ - void *call_private, const char *fmt, ...); + /* Probe wrapper */ + void (*call)(const struct marker *mdata, void *call_private, ...); struct marker_probe_closure single; struct marker_probe_closure *multi; } __attribute__((aligned(8))); @@ -72,8 +72,7 @@ struct marker { __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ (*__mark_##name.call) \ - (&__mark_##name, call_private, \ - format, ## args); \ + (&__mark_##name, call_private, ## args);\ } \ } while (0) @@ -117,9 +116,9 @@ static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, - void *call_private, const char *fmt, ...); + void *call_private, ...); extern void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, const char *fmt, ...); + void *call_private, ...); /* * Connect a probe to a marker. -- cgit v1.2.2 From 0aa977f592f17004f9d1d545f2e1bb9ea71896c3 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:10 +0200 Subject: Markers - define non optimized marker To support the forthcoming "immediate values" marker optimization, we must have a way to declare markers in few code paths that does not use instruction modification based enable. This will be the case of printk(), some traps and eventually lockdep instrumentation. Changelog : - Fix reversed boolean logic of "generic". Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/marker.h | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 338533abb47b..1290653f9241 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -58,8 +58,12 @@ struct marker { * Make sure the alignment of the structure in the __markers section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. + * + * The "generic" argument controls which marker enabling mechanism must be used. + * If generic is true, a variable read is used. + * If generic is false, immediate values are used. */ -#define __trace_mark(name, call_private, format, args...) \ +#define __trace_mark(generic, name, call_private, format, args...) \ do { \ static const char __mstrtab_##name[] \ __attribute__((section("__markers_strings"))) \ @@ -79,7 +83,7 @@ struct marker { extern void marker_update_probe_range(struct marker *begin, struct marker *end); #else /* !CONFIG_MARKERS */ -#define __trace_mark(name, call_private, format, args...) \ +#define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) static inline void marker_update_probe_range(struct marker *begin, struct marker *end) @@ -87,15 +91,30 @@ static inline void marker_update_probe_range(struct marker *begin, #endif /* CONFIG_MARKERS */ /** - * trace_mark - Marker + * trace_mark - Marker using code patching * @name: marker name, not quoted. * @format: format string * @args...: variable argument list * - * Places a marker. + * Places a marker using optimized code patching technique (imv_read()) + * to be enabled when immediate values are present. */ #define trace_mark(name, format, args...) \ - __trace_mark(name, NULL, format, ## args) + __trace_mark(0, name, NULL, format, ## args) + +/** + * _trace_mark - Marker using variable read + * @name: marker name, not quoted. + * @format: format string + * @args...: variable argument list + * + * Places a marker using a standard memory read (_imv_read()) to be + * enabled. Should be used for markers in code paths where instruction + * modification based enabling is not welcome. (__init and __exit functions, + * lockdep, some traps, printk). + */ +#define _trace_mark(name, format, args...) \ + __trace_mark(1, name, NULL, format, ## args) /** * MARK_NOARGS - Format string for a marker with no argument. -- cgit v1.2.2 From 5b82a1b08a00b2adca3d9dd9777efff40b7aaaa1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:10 +0200 Subject: Port ftrace to markers Porting ftrace to the marker infrastructure. Don't need to chain to the wakeup tracer from the sched tracer, because markers support multiple probes connected. Signed-off-by: Mathieu Desnoyers CC: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 360ca99033d2..c0b1c69b55ce 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2131,38 +2131,6 @@ __trace_special(void *__tr, void *__data, } #endif -#ifdef CONFIG_CONTEXT_SWITCH_TRACER -extern void -ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); -extern void -ftrace_wake_up_task(void *rq, struct task_struct *wakee, - struct task_struct *curr); -extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); -extern void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); -#else -static inline void -ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) -{ -} -static inline void -sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3) -{ -} -static inline void -ftrace_wake_up_task(void *rq, struct task_struct *wakee, - struct task_struct *curr) -{ -} -static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) -{ -} -static inline void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} -#endif - extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); -- cgit v1.2.2 From 74f4e369fc5b52433ad824cef32d3bf1304549be Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:15 +0200 Subject: ftrace: stacktrace fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0d3714e7110b..017ab44d572a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -120,4 +120,12 @@ static inline void tracer_disable(void) # define trace_preempt_off(a0, a1) do { } while (0) #endif +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); +#else +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } +#endif + #endif /* _LINUX_FTRACE_H */ -- cgit v1.2.2 From d49dbf33f0bf8748ee3662b973eb57e60525d622 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 16 May 2008 10:41:53 +0200 Subject: ftrace: fix include file dependency Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 017ab44d572a..911d5d80b49f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -4,6 +4,7 @@ #ifdef CONFIG_FTRACE #include +#include extern int ftrace_enabled; extern int -- cgit v1.2.2 From 489f139614596cbc956a06f5e4bb41288e276fe3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 Feb 2008 13:38:05 +0100 Subject: ftrace: fix build bug Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 911d5d80b49f..922e23d0196f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -106,16 +106,16 @@ static inline void tracer_disable(void) #endif #ifdef CONFIG_IRQSOFF_TRACER - extern void notrace time_hardirqs_on(unsigned long a0, unsigned long a1); - extern void notrace time_hardirqs_off(unsigned long a0, unsigned long a1); + extern void time_hardirqs_on(unsigned long a0, unsigned long a1); + extern void time_hardirqs_off(unsigned long a0, unsigned long a1); #else # define time_hardirqs_on(a0, a1) do { } while (0) # define time_hardirqs_off(a0, a1) do { } while (0) #endif #ifdef CONFIG_PREEMPT_TRACER - extern void notrace trace_preempt_on(unsigned long a0, unsigned long a1); - extern void notrace trace_preempt_off(unsigned long a0, unsigned long a1); + extern void trace_preempt_on(unsigned long a0, unsigned long a1); + extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else # define trace_preempt_on(a0, a1) do { } while (0) # define trace_preempt_off(a0, a1) do { } while (0) -- cgit v1.2.2 From 8b7d89d02ef3c6a7c73d6596f28cea7632850af4 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:56 +0200 Subject: x86: mmiotrace - trace memory mapped IO Mmiotrace is a tool for trapping memory mapped IO (MMIO) accesses within the kernel. It is used for debugging and especially for reverse engineering evil binary drivers. Mmiotrace works by wrapping the ioremap family of kernel functions and marking the returned pages as not present. Access to the IO memory triggers a page fault, which will be handled by mmiotrace's custom page fault handler. This will single-step the faulted instruction with the MMIO page marked as present. Access logs are directed to user space via relay and debug_fs. This page fault approach is necessary, because binary drivers have readl/writel etc. calls inlined and therefore extremely difficult to trap with with e.g. kprobes. This patch depends on the custom page fault handlers patch. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 include/linux/mmiotrace.h (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h new file mode 100644 index 000000000000..cb247825f3ec --- /dev/null +++ b/include/linux/mmiotrace.h @@ -0,0 +1,62 @@ +#ifndef MMIOTRACE_H +#define MMIOTRACE_H + +#include + +#define MMIO_VERSION 0x04 + +/* mm_io_header.type */ +#define MMIO_OPCODE_MASK 0xff +#define MMIO_OPCODE_SHIFT 0 +#define MMIO_WIDTH_MASK 0xff00 +#define MMIO_WIDTH_SHIFT 8 +#define MMIO_MAGIC (0x6f000000 | (MMIO_VERSION<<16)) +#define MMIO_MAGIC_MASK 0xffff0000 + +enum mm_io_opcode { /* payload type: */ + MMIO_READ = 0x1, /* struct mm_io_rw */ + MMIO_WRITE = 0x2, /* struct mm_io_rw */ + MMIO_PROBE = 0x3, /* struct mm_io_map */ + MMIO_UNPROBE = 0x4, /* struct mm_io_map */ + MMIO_MARKER = 0x5, /* raw char data */ + MMIO_UNKNOWN_OP = 0x6, /* struct mm_io_rw */ +}; + +struct mm_io_header { + __u32 type; + __u32 sec; /* timestamp */ + __u32 nsec; + __u32 pid; /* PID of the process, or 0 for kernel core */ + __u16 data_len; /* length of the following payload */ +}; + +struct mm_io_rw { + __u64 address; /* virtual address of register */ + __u64 value; + __u64 pc; /* optional program counter */ +}; + +struct mm_io_map { + __u64 phys; /* base address in PCI space */ + __u64 addr; /* base virtual address */ + __u64 len; /* mapping size */ + __u64 pc; /* optional program counter */ +}; + + +/* + * These structures are used to allow a single relay_write() + * call to write a full packet. + */ + +struct mm_io_header_rw { + struct mm_io_header header; + struct mm_io_rw rw; +} __attribute__((packed)); + +struct mm_io_header_map { + struct mm_io_header header; + struct mm_io_map map; +} __attribute__((packed)); + +#endif /* MMIOTRACE_H */ -- cgit v1.2.2 From 63ffa3e456c1a9884a3ebac997d91e3fdae18d78 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86 mmiotrace: comment about user space ABI Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index cb247825f3ec..6ec288f1fe24 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -3,6 +3,10 @@ #include +/* + * If you change anything here, you must bump MMIO_VERSION. + * This is the relay data format for user space. + */ #define MMIO_VERSION 0x04 /* mm_io_header.type */ @@ -23,7 +27,7 @@ enum mm_io_opcode { /* payload type: */ }; struct mm_io_header { - __u32 type; + __u32 type; /* see MMIO_* macros above */ __u32 sec; /* timestamp */ __u32 nsec; __u32 pid; /* PID of the process, or 0 for kernel core */ -- cgit v1.2.2 From 0fd0e3da4557c479b820b9a4a7afa25b4637ddf2 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86: mmiotrace full patch, preview 1 kmmio.c handles the list of mmio probes with callbacks, list of traced pages, and attaching into the page fault handler and die notifier. It arms, traps and disarms the given pages, this is the core of mmiotrace. mmio-mod.c is a user interface, hooking into ioremap functions and registering the mmio probes. It also decodes the required information from trapped mmio accesses via the pre and post callbacks in each probe. Currently, hooking into ioremap functions works by redefining the symbols of the target (binary) kernel module, so that it calls the traced versions of the functions. The most notable changes done since the last discussion are: - kmmio.c is a built-in, not part of the module - direct call from fault.c to kmmio.c, removing all dynamic hooks - prepare for unregistering probes at any time - make kmmio re-initializable and accessible to more than one user - rewrite kmmio locking to remove all spinlocks from page fault path Can I abuse call_rcu() like I do in kmmio.c:unregister_kmmio_probe() or is there a better way? The function called via call_rcu() itself calls call_rcu() again, will this work or break? There I need a second grace period for RCU after the first grace period for page faults. Mmiotrace itself (mmio-mod.c) is still a module, I am going to attack that next. At some point I will start looking into how to make mmiotrace a tracer component of ftrace (thanks for the hint, Ingo). Ftrace should make the user space part of mmiotracing as simple as 'cat /debug/trace/mmio > dump.txt'. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 6ec288f1fe24..d87a6cd8b686 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -3,6 +3,44 @@ #include +#ifdef __KERNEL__ + +#include + +struct kmmio_probe; +struct pt_regs; + +typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *, + struct pt_regs *, unsigned long addr); +typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, + unsigned long condition, struct pt_regs *); + +struct kmmio_probe { + struct list_head list; + unsigned long addr; /* start location of the probe point */ + unsigned long len; /* length of the probe region */ + kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ + kmmio_post_handler_t post_handler; /* Called after addr is executed */ +}; + +/* kmmio is active by some kmmio_probes? */ +static inline int is_kmmio_active(void) +{ + extern unsigned int kmmio_count; + return kmmio_count; +} + +extern void reference_kmmio(void); +extern void unreference_kmmio(void); +extern int register_kmmio_probe(struct kmmio_probe *p); +extern void unregister_kmmio_probe(struct kmmio_probe *p); + +/* Called from page fault handler. */ +extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); + +#endif /* __KERNEL__ */ + + /* * If you change anything here, you must bump MMIO_VERSION. * This is the relay data format for user space. -- cgit v1.2.2 From d61fc44853f46fb002228b18aa5f30db21fcd4ac Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86: mmiotrace, preview 2 Kconfig.debug, Makefile and testmmiotrace.c style fixes. Use real mutex instead of mutex. Fix failure path in register probe func. kmmio: RCU read-locked over single stepping. Generate mapping id's. Make mmio-mod.c built-in and rewrite its locking. Add debugfs file to enable/disable mmiotracing. kmmio: use irqsave spinlocks. Lots of cleanups in mmio-mod.c Marker file moved from /proc into debugfs. Call mmiotrace entrypoints directly from ioremap.c. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index d87a6cd8b686..cb5efd0c7f51 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -16,11 +16,12 @@ typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, unsigned long condition, struct pt_regs *); struct kmmio_probe { - struct list_head list; + struct list_head list; /* kmmio internal list */ unsigned long addr; /* start location of the probe point */ unsigned long len; /* length of the probe region */ kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ kmmio_post_handler_t post_handler; /* Called after addr is executed */ + void *user_data; }; /* kmmio is active by some kmmio_probes? */ @@ -38,6 +39,21 @@ extern void unregister_kmmio_probe(struct kmmio_probe *p); /* Called from page fault handler. */ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); +/* Called from ioremap.c */ +#ifdef CONFIG_MMIOTRACE +extern void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr); +extern void mmiotrace_iounmap(volatile void __iomem *addr); +#else +static inline void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +{ +} +static inline void mmiotrace_iounmap(volatile void __iomem *addr) +{ +} +#endif /* CONFIG_MMIOTRACE_HOOKS */ + #endif /* __KERNEL__ */ -- cgit v1.2.2 From f984b51e0779a6dd30feedc41404013ca54e5d05 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: ftrace: add mmiotrace plugin On Sat, 22 Mar 2008 13:07:47 +0100 Ingo Molnar wrote: > > > i'd suggest the following: pull x86.git and sched-devel.git into a > > > single tree [the two will combine without rejects]. Then try to add a > > > kernel/tracing/trace_mmiotrace.c ftrace plugin. The trace_sysprof.c > > > plugin might be a good example. > > > > I did this and now I have mmiotrace enabled/disabled via the tracing > > framework (what do we call this, since ftrace is one of the tracers?). > > cool! could you send the patches for that? (even if they are not fully > functional yet) Patch attached in the end. Nice to see how much code disappeared. I tried to mark all the features I had to break with XXX-comments. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index cb5efd0c7f51..579b3b06c90e 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -54,6 +54,12 @@ static inline void mmiotrace_iounmap(volatile void __iomem *addr) } #endif /* CONFIG_MMIOTRACE_HOOKS */ +/* in kernel/trace/trace_mmiotrace.c */ +extern int __init init_mmiotrace(void); +extern void enable_mmiotrace(void); +extern void disable_mmiotrace(void); +extern void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg); + #endif /* __KERNEL__ */ -- cgit v1.2.2 From bd8ac686c73c7e925fcfe0b02dc4e7b947127864 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: ftrace: mmiotrace, updates here is a patch that makes mmiotrace work almost well within the tracing framework. The patch applies on top of my previous patch. I have my own output formatting in place now. Summary of changes: - fix the NULL dereference that was due to not calling tracing_reset() - add print_line() callback into struct tracer - implement print_line() for mmiotrace, producing up-to-spec text - add my output header, but that is not really called in the right place - rewrote the main structs in mmiotrace - added two new trace entry types: TRACE_MMIO_RW and TRACE_MMIO_MAP - made some functions in trace.c non-static - check current==NULL in tracing_generic_entry_update() - fix(?) comparison in trace_seq_printf() Things seem to work fine except a few issues. Markers (text lines injected into mmiotrace log) are missing, I did not feel hacking them in before we have variable length entries. My output header is printed only for 'trace' file, but not 'trace_pipe'. For some reason, despite my quick fix, iter->trace is NULL in print_trace_line() when called from 'trace_pipe' file, which means I don't get proper output formatting. I only tried by loading nouveau.ko, which just detects the card, and that is traced fine. I didn't try further. Map, two reads and unmap. Works perfectly. I am missing the information about overflows, I'd prefer to have a counter for lost events. I didn't try, but I guess currently there is no way of knowning when it overflows? So, not too far from being fully operational, it seems :-) And looking at the diffstat, there also is some 700-900 lines of user space code that just became obsolete. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 85 ++++++++++++++--------------------------------- 1 file changed, 25 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 579b3b06c90e..c88a9c197d22 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -54,73 +54,38 @@ static inline void mmiotrace_iounmap(volatile void __iomem *addr) } #endif /* CONFIG_MMIOTRACE_HOOKS */ -/* in kernel/trace/trace_mmiotrace.c */ -extern int __init init_mmiotrace(void); -extern void enable_mmiotrace(void); -extern void disable_mmiotrace(void); -extern void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg); - -#endif /* __KERNEL__ */ - - -/* - * If you change anything here, you must bump MMIO_VERSION. - * This is the relay data format for user space. - */ -#define MMIO_VERSION 0x04 - -/* mm_io_header.type */ -#define MMIO_OPCODE_MASK 0xff -#define MMIO_OPCODE_SHIFT 0 -#define MMIO_WIDTH_MASK 0xff00 -#define MMIO_WIDTH_SHIFT 8 -#define MMIO_MAGIC (0x6f000000 | (MMIO_VERSION<<16)) -#define MMIO_MAGIC_MASK 0xffff0000 - -enum mm_io_opcode { /* payload type: */ - MMIO_READ = 0x1, /* struct mm_io_rw */ - MMIO_WRITE = 0x2, /* struct mm_io_rw */ - MMIO_PROBE = 0x3, /* struct mm_io_map */ - MMIO_UNPROBE = 0x4, /* struct mm_io_map */ +enum mm_io_opcode { + MMIO_READ = 0x1, /* struct mmiotrace_rw */ + MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ + MMIO_PROBE = 0x3, /* struct mmiotrace_map */ + MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ MMIO_MARKER = 0x5, /* raw char data */ - MMIO_UNKNOWN_OP = 0x6, /* struct mm_io_rw */ + MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */ }; -struct mm_io_header { - __u32 type; /* see MMIO_* macros above */ - __u32 sec; /* timestamp */ - __u32 nsec; - __u32 pid; /* PID of the process, or 0 for kernel core */ - __u16 data_len; /* length of the following payload */ +struct mmiotrace_rw { + unsigned long phys; /* PCI address of register */ + unsigned long value; + unsigned long pc; /* optional program counter */ + int map_id; + unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */ + unsigned char width; /* size of register access in bytes */ }; -struct mm_io_rw { - __u64 address; /* virtual address of register */ - __u64 value; - __u64 pc; /* optional program counter */ +struct mmiotrace_map { + unsigned long phys; /* base address in PCI space */ + unsigned long virt; /* base virtual address */ + unsigned long len; /* mapping size */ + int map_id; + unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */ }; -struct mm_io_map { - __u64 phys; /* base address in PCI space */ - __u64 addr; /* base virtual address */ - __u64 len; /* mapping size */ - __u64 pc; /* optional program counter */ -}; - - -/* - * These structures are used to allow a single relay_write() - * call to write a full packet. - */ - -struct mm_io_header_rw { - struct mm_io_header header; - struct mm_io_rw rw; -} __attribute__((packed)); +/* in kernel/trace/trace_mmiotrace.c */ +extern void enable_mmiotrace(void); +extern void disable_mmiotrace(void); +extern void mmio_trace_rw(struct mmiotrace_rw *rw); +extern void mmio_trace_mapping(struct mmiotrace_map *map); -struct mm_io_header_map { - struct mm_io_header header; - struct mm_io_map map; -} __attribute__((packed)); +#endif /* __KERNEL__ */ #endif /* MMIOTRACE_H */ -- cgit v1.2.2 From 138295373ccf7625fcb0218dfea114837983bc39 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: mmiotrace update, #2 another weekend, another patch. This should apply on top of my previous patch from March 23rd. Summary of changes: - Print PCI device list in output header - work around recursive probe hits on SMP - refactor dis/arm_kmmio_fault_page() and add check for page levels - remove un/reference_kmmio(), the die notifier hook is registered permanently into the list - explicitly check for single stepping in die notifier callback I have tested this version on my UP Athlon64 desktop with Nouveau, and SMP Core 2 Duo laptop with the proprietary nvidia driver. Both systems are 64-bit. One previously unknown bug crept into daylight: the ftrace framework's output routines print the first entry last after buffer has wrapped around. The most important regressions compared to non-ftrace mmiotrace at this time are: - failure of trace_pipe file - illegal lines in output file - unaware of losing data due to buffer full Personally I'd like to see these three solved before submitting to mainline. Other issues may come up once we know when we lose events. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index c88a9c197d22..dd6b64b160fc 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -31,8 +31,6 @@ static inline int is_kmmio_active(void) return kmmio_count; } -extern void reference_kmmio(void); -extern void unreference_kmmio(void); extern int register_kmmio_probe(struct kmmio_probe *p); extern void unregister_kmmio_probe(struct kmmio_probe *p); -- cgit v1.2.2 From 970e6fa03885f32cc43e42cb08c73a5f54cd8bd9 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: mmiotrace: code style cleanups From c2da03771e29159627c5c7b9509ec70bce9f91ee Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 28 Apr 2008 21:25:22 +0300 Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- include/linux/mmiotrace.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index dd6b64b160fc..de8e91258da7 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -1,9 +1,7 @@ #ifndef MMIOTRACE_H #define MMIOTRACE_H -#include - -#ifdef __KERNEL__ +#include #include @@ -84,6 +82,4 @@ extern void disable_mmiotrace(void); extern void mmio_trace_rw(struct mmiotrace_rw *rw); extern void mmio_trace_mapping(struct mmiotrace_map *map); -#endif /* __KERNEL__ */ - #endif /* MMIOTRACE_H */ -- cgit v1.2.2 From dee310d0adf41019aca476052ac3085ff286d9be Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: x86 mmiotrace: use resource_size_t for phys addresses Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- include/linux/mmiotrace.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index de8e91258da7..5cbbc374e945 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -2,7 +2,6 @@ #define MMIOTRACE_H #include - #include struct kmmio_probe; @@ -37,14 +36,15 @@ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); /* Called from ioremap.c */ #ifdef CONFIG_MMIOTRACE -extern void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr); +extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size, + void __iomem *addr); extern void mmiotrace_iounmap(volatile void __iomem *addr); #else -static inline void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +static inline void mmiotrace_ioremap(resource_size_t offset, + unsigned long size, void __iomem *addr) { } + static inline void mmiotrace_iounmap(volatile void __iomem *addr) { } @@ -60,7 +60,7 @@ enum mm_io_opcode { }; struct mmiotrace_rw { - unsigned long phys; /* PCI address of register */ + resource_size_t phys; /* PCI address of register */ unsigned long value; unsigned long pc; /* optional program counter */ int map_id; @@ -69,7 +69,7 @@ struct mmiotrace_rw { }; struct mmiotrace_map { - unsigned long phys; /* base address in PCI space */ + resource_size_t phys; /* base address in PCI space */ unsigned long virt; /* base virtual address */ unsigned long len; /* mapping size */ int map_id; -- cgit v1.2.2 From a50445d76c22a34ae149704ea5adaef171c8acb7 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: mmiotrace: rename kmmio_probe::user_data to :private. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- include/linux/mmiotrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 5cbbc374e945..61d19e1b7a0b 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -18,7 +18,7 @@ struct kmmio_probe { unsigned long len; /* length of the probe region */ kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ kmmio_post_handler_t post_handler; /* Called after addr is executed */ - void *user_data; + void *private; }; /* kmmio is active by some kmmio_probes? */ -- cgit v1.2.2 From 42fdfa238a23643226910acf922ea930b3286032 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 May 2008 23:14:51 +0200 Subject: namespacecheck: more kernel/printk.c fixes [ Stephen Rothwell : build fix ] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/kernel.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 792bf0aa779b..f2a668c195bf 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -184,9 +184,6 @@ asmlinkage int vprintk(const char *fmt, va_list args) __attribute__ ((format (printf, 1, 0))); asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern int log_buf_get_len(void); -extern int log_buf_read(int idx); -extern int log_buf_copy(char *dest, int idx, int len); extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; @@ -202,9 +199,6 @@ static inline int vprintk(const char *s, va_list args) { return 0; } static inline int printk(const char *s, ...) __attribute__ ((format (printf, 1, 2))); static inline int __cold printk(const char *s, ...) { return 0; } -static inline int log_buf_get_len(void) { return 0; } -static inline int log_buf_read(int idx) { return 0; } -static inline int log_buf_copy(char *dest, int idx, int len) { return 0; } static inline int printk_ratelimit(void) { return 0; } static inline int __printk_ratelimit(int ratelimit_jiffies, \ int ratelimit_burst) { return 0; } -- cgit v1.2.2 From 63687a528c39a67c1a213cdffa09feb0e6af9dbe Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:44:41 +0200 Subject: x86: move tracedata to RODATA .. allowing it to be write-protected just as other read-only data under CONFIG_DEBUG_RODATA. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/resume-trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h index f3f4f28c6960..c9ba2fdf807d 100644 --- a/include/linux/resume-trace.h +++ b/include/linux/resume-trace.h @@ -8,7 +8,7 @@ extern int pm_trace_enabled; struct device; extern void set_trace_device(struct device *); -extern void generate_resume_trace(void *tracedata, unsigned int user); +extern void generate_resume_trace(const void *tracedata, unsigned int user); #define TRACE_DEVICE(dev) do { \ if (pm_trace_enabled) \ -- cgit v1.2.2 From 962cf36c5bf6d2840b8d66ee9a606fae2f540bbd Mon Sep 17 00:00:00 2001 From: "Carlos R. Mafra" Date: Thu, 15 May 2008 11:15:37 -0300 Subject: Remove argument from open_softirq which is always NULL As git-grep shows, open_softirq() is always called with the last argument being NULL block/blk-core.c: open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); kernel/hrtimer.c: open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL); kernel/rcuclassic.c: open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); kernel/rcupreempt.c: open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); kernel/sched.c: open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); kernel/softirq.c: open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); kernel/softirq.c: open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); kernel/timer.c: open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); net/core/dev.c: open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); net/core/dev.c: open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); This observation has already been made by Matthew Wilcox in June 2002 (http://www.cs.helsinki.fi/linux/linux-kernel/2002-25/0687.html) "I notice that none of the current softirq routines use the data element passed to them." and the situation hasn't changed since them. So it appears we can safely remove that extra argument to save 128 (54) bytes of kernel data (text). Signed-off-by: Carlos R. Mafra Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f1fc7470d26c..a86186dd0474 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -285,12 +285,11 @@ enum struct softirq_action { void (*action)(struct softirq_action *); - void *data; }; asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); -extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); +extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) extern void raise_softirq_irqoff(unsigned int nr); -- cgit v1.2.2 From e9197bf0114661195bee35e7795cfc42164d9b2c Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:10 -0700 Subject: x86 boot: remove some unused extern function declarations Remove three extern declarations for routines that don't exist. Fix a typo in a comment. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar --- include/linux/efi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index a5f359a7ad0e..807373d467f7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -287,7 +287,6 @@ efi_guid_unparse(efi_guid_t *guid, char *out) extern void efi_init (void); extern void *efi_get_pal_addr (void); extern void efi_map_pal_code (void); -extern void efi_map_memmap(void); extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg); extern void efi_gettimeofday (struct timespec *ts); extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ @@ -295,14 +294,11 @@ extern u64 efi_get_iobase (void); extern u32 efi_mem_type (unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); -extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, - u64 attr); extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern unsigned long efi_get_time(void); extern int efi_set_rtc_mmss(unsigned long nowtime); -extern int is_available_memory(efi_memory_desc_t * md); extern struct efi_memory_map memmap; /** -- cgit v1.2.2 From c801ed3860fe2f84281d4cae4c3e6ca87e81e890 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:23 -0700 Subject: x86 boot: simplify pageblock_bits enum declaration The use of #defines with '##' pre-processor concatenation is a useful way to form several symbol names with a common pattern. But when there is just a single name obtained from that #define, it's just obfuscation. Better to just write the plain symbol name, as is. The following patch is a result of my wasting ten minutes looking through the kernel to figure out what 'PB_migrate_end' meant, and forgetting what I came to do, by the time I figured out that the #define PB_range macro defined it. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar --- include/linux/pageblock-flags.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e875905f7b12..e8c06122be36 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -25,13 +25,11 @@ #include -/* Macro to aid the definition of ranges of bits */ -#define PB_range(name, required_bits) \ - name, name ## _end = (name + required_bits) - 1 - /* Bit indices that affect a whole block of pages */ enum pageblock_bits { - PB_range(PB_migrate, 3), /* 3 bits required for migrate types */ + PB_migrate, + PB_migrate_end = PB_migrate + 3 - 1, + /* 3 bits required for migrate types */ NR_PAGEBLOCK_BITS }; -- cgit v1.2.2 From 41c52c0db9607e59f90da7da5309489fa06e887f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 11:46:33 -0400 Subject: ftrace: set_ftrace_notrace feature While debugging latencies in the RT kernel, I found that it would be nice to be able to filter away functions from the trace than just to filter on functions. I added a new interface to the debugfs tracing directory called set_ftrace_notrace When dynamic frace is enabled, this lets you filter away functions that will not be recorded in the trace. It is similar to adding 'notrace' to those functions but by doing it without recompiling the kernel. Here's how set_ftrace_filter and set_ftrace_notrace interact. Remember, if set_ftrace_filter is set, it removes all functions from the trace execpt for those listed in the set_ftrace_filter. set_ftrace_notrace will prevent those functions from being traced. If you were to set one function in both set_ftrace_filter and set_ftrace_notrace and that function was the same, then you would end up with an empty trace. the set of functions to trace is: set_ftrace_filter == empty then all functions not in set_ftrace_notrace else set of the set_ftrace_filter and not in set of set_ftrace_notrace. Signed-off-by: Steven Rostedt Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 922e23d0196f..ffbbd54a720e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -48,6 +48,7 @@ enum { FTRACE_FL_FAILED = (1 << 1), FTRACE_FL_FILTER = (1 << 2), FTRACE_FL_ENABLED = (1 << 3), + FTRACE_FL_NOTRACE = (1 << 4), }; struct dyn_ftrace { -- cgit v1.2.2 From 9e124fe16ff24746d6de5a2ad685266d7bce0e08 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 26 May 2008 23:31:07 +0100 Subject: xen: Enable console tty by default in domU if it's not a dummy Without console= arguments on the kernel command line, the first console to register becomes enabled and the preferred console (the one behind /dev/console). This is normally tty (assuming CONFIG_VT_CONSOLE is enabled, which it commonly is). This is okay as long tty is a useful console. But unless we have the PV framebuffer, and it is enabled for this domain, tty0 in domU is merely a dummy. In that case, we want the preferred console to be the Xen console hvc0, and we want it without having to fiddle with the kernel command line. Commit b8c2d3dfbc117dff26058fbac316b8acfc2cb5f7 did that for us. Since we now have the PV framebuffer, we want to enable and prefer tty again, but only when PVFB is enabled. But even then we still want to enable the Xen console as well. Problem: when tty registers, we can't yet know whether the PVFB is enabled. By the time we can know (xenstore is up), the console setup game is over. Solution: enable console tty by default, but keep hvc as the preferred console. Change the preferred console to tty when PVFB probes successfully, unless we've been given console kernel parameters. Signed-off-by: Markus Armbruster Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- include/linux/console.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index a4f27fbdf549..248e6e3b9b73 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -108,6 +108,8 @@ struct console { struct console *next; }; +extern int console_set_on_cmdline; + extern int add_preferred_console(char *name, int idx, char *options); extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options); extern void register_console(struct console *); -- cgit v1.2.2 From 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:27 +0100 Subject: xen: implement save/restore This patch implements Xen save/restore and migration. Saving is triggered via xenbus, which is polled in drivers/xen/manage.c. When a suspend request comes in, the kernel prepares itself for saving by: 1 - Freeze all processes. This is primarily to prevent any partially-completed pagetable updates from confusing the suspend process. If CONFIG_PREEMPT isn't defined, then this isn't necessary. 2 - Suspend xenbus and other devices 3 - Stop_machine, to make sure all the other vcpus are quiescent. The Xen tools require the domain to run its save off vcpu0. 4 - Within the stop_machine state, it pins any unpinned pgds (under construction or destruction), performs canonicalizes various other pieces of state (mostly converting mfns to pfns), and finally 5 - Suspend the domain Restore reverses the steps used to save the domain, ending when all the frozen processes are thawed. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- include/linux/page-flags.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 590cff32415d..02955a1c3d76 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -157,6 +157,7 @@ PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) __PAGEFLAG(Slab, slab) PAGEFLAG(Checked, owner_priv_1) /* Used by some filesystems */ PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */ +PAGEFLAG(SavePinned, dirty); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) -- cgit v1.2.2 From b1829d2705daa7cb72eb1e08bdc8b7e9fad34266 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 May 2008 01:22:08 +0200 Subject: ftrace: fix merge Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ffbbd54a720e..b482fe88bc04 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -122,7 +122,7 @@ static inline void tracer_disable(void) # define trace_preempt_off(a0, a1) do { } while (0) #endif -#ifdef CONFIG_CONTEXT_SWITCH_TRACER +#ifdef CONFIG_TRACING extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); #else -- cgit v1.2.2 From ad90c0e3ce8d20d6873b57e36181ef6d7a0097fe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 27 May 2008 20:48:37 -0400 Subject: ftrace: user update and disable dynamic ftrace daemon In dynamic ftrace, the mcount function starts off pointing to a stub function that just returns. On start up, the call to the stub is modified to point to a "record_ip" function. The job of the record_ip function is to add the function to a pre-allocated hash list. If the function is already there, it simply is ignored, otherwise it is added to the list. Later, a ftraced daemon wakes up and calls kstop_machine if any functions have been recorded, and changes the calls to the recorded functions to a simple nop. If no functions were recorded, the daemon goes back to sleep. The daemon wakes up once a second to see if it needs to update any newly recorded functions into nops. Usually it does not, but if a lot of code has been executed for the first time in the kernel, the ftraced daemon will call kstop_machine to update those into nops. The problem currently is that there's no way to stop the daemon from doing this, and it can cause unneeded latencies (800us which for some is bothersome). This patch adds a new file /debugfs/tracing/ftraced_enabled. If the daemon is active, reading this will return "enabled\n" and "disabled\n" when the daemon is not running. To disable the daemon, the user can echo "0" or "disable" into this file, and "1" or "enable" to re-enable the daemon. Since the daemon is used to convert the functions into nops to increase the performance of the system, I also added that anytime something is written into the ftraced_enabled file, kstop_machine will run if there are new functions that have been detected that need to be converted. This way the user can disable the daemon but still be able to control the conversion of the mcount calls to nops by simply, "echo 0 > /debugfs/tracing/ftraced_enabled" when they need to do more conversions. To see the number of converted functions: "cat /debugfs/tracing/dyn_ftrace_total_info" Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b482fe88bc04..623819433ed5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -72,9 +72,15 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); + +void ftrace_disable_daemon(void); +void ftrace_enable_daemon(void); + #else # define ftrace_force_update() ({ 0; }) # define ftrace_set_filter(buf, len, reset) do { } while (0) +# define ftrace_disable_daemon() do { } while (0) +# define ftrace_enable_daemon() do { } while (0) #endif /* totally disable ftrace - can not re-enable after this */ -- cgit v1.2.2 From 18404756765c713a0be4eb1082920c04822ce588 Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Thu, 29 May 2008 11:02:52 -0700 Subject: genirq: Expose default irq affinity mask (take 3) Current IRQ affinity interface does not provide a way to set affinity for the IRQs that will be allocated/activated in the future. This patch creates /proc/irq/default_smp_affinity that lets users set default affinity mask for the newly allocated IRQs. Changing the default does not affect affinity masks for the currently active IRQs, they have to be changed explicitly. Updated based on Paul J's comments and added some more documentation. Signed-off-by: Max Krasnyansky Cc: pj@sgi.com Cc: a.p.zijlstra@chello.nl Cc: tglx@linutronix.de Cc: rdunlap@xenotime.net Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 5 +++++ include/linux/irq.h | 9 --------- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f1fc7470d26c..043400f3d458 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -104,8 +104,11 @@ extern void enable_irq(unsigned int irq); #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) +extern cpumask_t irq_default_affinity; + extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask); extern int irq_can_set_affinity(unsigned int irq); +extern int irq_select_affinity(unsigned int irq); #else /* CONFIG_SMP */ @@ -119,6 +122,8 @@ static inline int irq_can_set_affinity(unsigned int irq) return 0; } +static inline int irq_select_affinity(unsigned int irq) { return 0; } + #endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */ #ifdef CONFIG_GENERIC_HARDIRQS diff --git a/include/linux/irq.h b/include/linux/irq.h index 552e0ec269c9..8ccb462ea42c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -244,15 +244,6 @@ static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) } #endif -#ifdef CONFIG_AUTO_IRQ_AFFINITY -extern int select_smp_affinity(unsigned int irq); -#else -static inline int select_smp_affinity(unsigned int irq) -{ - return 1; -} -#endif - extern int no_irq_affinity; static inline int irq_balancing_disabled(unsigned int irq) -- cgit v1.2.2 From 554ec22f075d46e4363520a407d2b7eeb5dfdd43 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:03 +0200 Subject: namespacecheck: more sched.c fixes [ Stephen Rothwell : build fix ] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ae0be3c62375..dc36c3aea018 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -134,7 +134,6 @@ extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); -extern unsigned long weighted_cpuload(const int cpu); struct seq_file; struct cfs_rq; @@ -823,23 +822,6 @@ extern int arch_reinit_sched_domains(void); #endif /* CONFIG_SMP */ -/* - * A runqueue laden with a single nice 0 task scores a weighted_cpuload of - * SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a - * task of nice 0 or enough lower priority tasks to bring up the - * weighted_cpuload - */ -static inline int above_background_load(void) -{ - unsigned long cpu; - - for_each_online_cpu(cpu) { - if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE) - return 1; - } - return 0; -} - struct io_context; /* See blkdev.h */ #define NGROUPS_SMALL 32 #define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) -- cgit v1.2.2 From c7aceaba042702538b23cf4e0de1b2891ad8e671 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Thu, 15 May 2008 12:09:15 +0100 Subject: sched: reorder task_struct to reduce padding on 64bit builds This patch removes 24 bytes of padding and allows 1 extra object per slab on my fedora based config. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index dc36c3aea018..ea2857b99596 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1021,6 +1021,7 @@ struct task_struct { #endif int prio, static_prio, normal_prio; + unsigned int rt_priority; const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; @@ -1104,7 +1105,6 @@ struct task_struct { int __user *set_child_tid; /* CLONE_CHILD_SETTID */ int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ - unsigned int rt_priority; cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; cputime_t prev_utime, prev_stime; @@ -1123,12 +1123,12 @@ struct task_struct { gid_t gid,egid,sgid,fsgid; struct group_info *group_info; kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; - unsigned securebits; struct user_struct *user; + unsigned securebits; #ifdef CONFIG_KEYS + unsigned char jit_keyring; /* default keyring to attach requested keys to */ struct key *request_key_auth; /* assumed request_key authority */ struct key *thread_keyring; /* keyring private to this thread */ - unsigned char jit_keyring; /* default keyring to attach requested keys to */ #endif char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock @@ -1215,8 +1215,8 @@ struct task_struct { # define MAX_LOCK_DEPTH 48UL u64 curr_chain_key; int lockdep_depth; - struct held_lock held_locks[MAX_LOCK_DEPTH]; unsigned int lockdep_recursion; + struct held_lock held_locks[MAX_LOCK_DEPTH]; #endif /* journalling filesystem info */ @@ -1244,10 +1244,6 @@ struct task_struct { u64 acct_vm_mem1; /* accumulated virtual memory usage */ cputime_t acct_stimexpd;/* stime since last update */ #endif -#ifdef CONFIG_NUMA - struct mempolicy *mempolicy; - short il_next; -#endif #ifdef CONFIG_CPUSETS nodemask_t mems_allowed; int cpuset_mems_generation; @@ -1266,6 +1262,10 @@ struct task_struct { #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; +#endif +#ifdef CONFIG_NUMA + struct mempolicy *mempolicy; + short il_next; #endif atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; -- cgit v1.2.2 From 1f11eb6a8bc92536d9e93ead48fa3ffbd1478571 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Wed, 4 Jun 2008 15:04:05 -0400 Subject: sched: fix cpupri hotplug support The RT folks over at RedHat found an issue w.r.t. hotplug support which was traced to problems with the cpupri infrastructure in the scheduler: https://bugzilla.redhat.com/show_bug.cgi?id=449676 This bug affects 23-rt12+, 24-rtX, 25-rtX, and sched-devel. This patch applies to 25.4-rt4, though it should trivially apply to most cpupri enabled kernels mentioned above. It turned out that the issue was that offline cpus could get inadvertently registered with cpupri so that they were erroneously selected during migration decisions. The end result would be an OOPS as the offline cpu had tasks routed to it. This patch generalizes the old join/leave domain interface into an online/offline interface, and adjusts the root-domain/hotplug code to utilize it. I was able to easily reproduce the issue prior to this patch, and am no longer able to reproduce it after this patch. I can offline cpus indefinately and everything seems to be in working order. Thanks to Arnaldo (acme), Thomas, and Peter for doing the legwork to point me in the right direction. Also thank you to Peter for reviewing the early iterations of this patch. Signed-off-by: Gregory Haskins Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ea2857b99596..d25acf600a32 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -903,8 +903,8 @@ struct sched_class { void (*set_cpus_allowed)(struct task_struct *p, const cpumask_t *newmask); - void (*join_domain)(struct rq *rq); - void (*leave_domain)(struct rq *rq); + void (*rq_online)(struct rq *rq); + void (*rq_offline)(struct rq *rq); void (*switched_from) (struct rq *this_rq, struct task_struct *task, int running); -- cgit v1.2.2 From cc1a9d86ce989083703c4bdc11b75a87e1cc404a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Jun 2008 19:39:16 -0700 Subject: mm, x86: shrink_active_range() should check all Now we are using register_e820_active_regions() instead of add_active_range() directly. So end_pfn could be different between the value in early_node_map to node_end_pfn. So we need to make shrink_active_range() smarter. shrink_active_range() is a generic MM function in mm/page_alloc.c but it is only used on 32-bit x86. Should we move it back to some file in arch/x86? Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/mm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c31a9cd2a30e..7cbd949f2516 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -997,8 +997,7 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat, extern void free_area_init_nodes(unsigned long *max_zone_pfn); extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); -extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn, - unsigned long new_end_pfn); +extern void shrink_active_range(unsigned int nid, unsigned long new_end_pfn); extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); -- cgit v1.2.2 From 0eb967012ea15e6e8cfab483d9fa37bc602d400c Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sun, 1 Jun 2008 21:47:30 +0530 Subject: ftrace: prevent freeing of all failed updates Prevent freeing of records which cause problems and correspond to function from core kernel text. A new flag, FTRACE_FL_CONVERTED is used to mark a record as "converted". All other records are patched lazily to NOPs. Failed records now also remain on frace_hash table. Each invocation of ftrace_record_ip now checks whether the traced function has ever been recorded (including past failures) and doesn't re-record it again. Signed-off-by: Abhishek Sagar Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 623819433ed5..20e14d0093c7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -49,6 +49,7 @@ enum { FTRACE_FL_FILTER = (1 << 2), FTRACE_FL_ENABLED = (1 << 3), FTRACE_FL_NOTRACE = (1 << 4), + FTRACE_FL_CONVERTED = (1 << 5), }; struct dyn_ftrace { -- cgit v1.2.2 From 9985b0bab332289f14837eff3c6e0bcc658b58f7 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 5 Jun 2008 12:57:11 -0700 Subject: sched: prevent bound kthreads from changing cpus_allowed Kthreads that have called kthread_bind() are bound to specific cpus, so other tasks should not be able to change their cpus_allowed from under them. Otherwise, it is possible to move kthreads, such as the migration or software watchdog threads, so they are not allowed access to the cpu they work on. Cc: Peter Zijlstra Cc: Paul Menage Cc: Paul Jackson Signed-off-by: David Rientjes Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d25acf600a32..2db1485f865d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1486,6 +1486,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ +#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ -- cgit v1.2.2 From c50cbb05a05cf1f9ca3592272eff053c847727d8 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 4 Jun 2008 21:47:29 -0700 Subject: cpu topology: always define CPU topology information This can result in an empty topology directory in sysfs, and requires in-kernel users to protect all uses with #ifdef - see . The documentation of CPU topology specifies what the defaults should be if only partial information is available from the hardware. So we can provide these defaults as a fallback. This patch: - Adds default definitions of the 4 topology macros to - Changes drivers/base/topology.c to use the topology macros unconditionally and to cope with definitions that aren't lvalues - Updates documentation accordingly [ From: Andrew Morton - fold now-duplicated code - fix layout ] Signed-off-by: Ben Hutchings Cc: Vegard Nossum Cc: Nick Piggin Cc: Chandra Seetharaman Cc: Suresh Siddha Cc: Mike Travis Cc: Christoph Lameter Cc: John Hawkes Cc: Zhang, Yanmin Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/topology.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 24f3d2282e11..2158fc0d5a56 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -179,4 +179,17 @@ void arch_update_cpu_topology(void); #endif #endif /* CONFIG_NUMA */ +#ifndef topology_physical_package_id +#define topology_physical_package_id(cpu) ((void)(cpu), -1) +#endif +#ifndef topology_core_id +#define topology_core_id(cpu) ((void)(cpu), 0) +#endif +#ifndef topology_thread_siblings +#define topology_thread_siblings(cpu) cpumask_of_cpu(cpu) +#endif +#ifndef topology_core_siblings +#define topology_core_siblings(cpu) cpumask_of_cpu(cpu) +#endif + #endif /* _LINUX_TOPOLOGY_H */ -- cgit v1.2.2 From e17ba73b0ee6c0f24393c48b455e0d8db761782c Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 12 May 2008 15:44:40 +0200 Subject: x86, generic: mark early_printk as asmlinkage It's not explicitly marked as asmlinkage, but invoked from x86_32 startup code with parameters on stack. No other architectures define early_printk and none of them are affected by this change, since defines asmlinkage as empty token. Signed-off-by: Jiri Slaby Cc: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f2a668c195bf..4cb8d3df414e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -207,7 +207,7 @@ static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ { return false; } #endif -extern void __attribute__((format(printf, 1, 2))) +extern void asmlinkage __attribute__((format(printf, 1, 2))) early_printk(const char *fmt, ...); unsigned long int_sqrt(unsigned long); -- cgit v1.2.2 From 443cd507ce7f78c6f8742b72736585c031d5a921 Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Fri, 20 Jun 2008 16:39:21 +0800 Subject: lockdep: add lock_class information to lock_chain and output it This patch records array of lock_class into lock_chain, and export lock_chain information via /proc/lockdep_chains. It is based on x86/master branch of git-x86 tree, and has been tested on x86_64 platform. Signed-off-by: Huang Ying Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 4c4d236ded18..b26fbc715a50 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -182,6 +182,9 @@ struct lock_list { * We record lock dependency chains, so that we can cache them: */ struct lock_chain { + u8 irq_context; + u8 depth; + u16 base; struct list_head entry; u64 chain_key; }; -- cgit v1.2.2 From 0b2806768899dba5967bcd4a3b93eaed9a1dc4f3 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 18 May 2008 14:27:41 -0600 Subject: Add cycle_kernel_lock() A number of driver functions are so obviously trivial that they do not need the big kernel lock - at least not overtly. It turns out that the acquisition of the BKL in driver open() functions can perform a sort of poor-hacker's serialization function, delaying the open operation until the driver is certain to have completed its initialization. Add a simple cycle_kernel_lock() function for these cases to make it clear that there is no need to *hold* the BKL, just to be sure that we can acquire it. Signed-off-by: Jonathan Corbet --- include/linux/smp_lock.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index aab3a4cff4e1..813be59bf345 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -27,11 +27,24 @@ static inline int reacquire_kernel_lock(struct task_struct *task) extern void __lockfunc lock_kernel(void) __acquires(kernel_lock); extern void __lockfunc unlock_kernel(void) __releases(kernel_lock); +/* + * Various legacy drivers don't really need the BKL in a specific + * function, but they *do* need to know that the BKL became available. + * This function just avoids wrapping a bunch of lock/unlock pairs + * around code which doesn't really need it. + */ +static inline void cycle_kernel_lock(void) +{ + lock_kernel(); + unlock_kernel(); +} + #else #define lock_kernel() do { } while(0) #define unlock_kernel() do { } while(0) #define release_kernel_lock(task) do { } while(0) +#define cycle_kernel_lock() do { } while(0) #define reacquire_kernel_lock(task) 0 #define kernel_locked() 1 -- cgit v1.2.2 From 395a59d0f8e86bb39cd700c3d185d30c670bb958 Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 21 Jun 2008 23:47:27 +0530 Subject: ftrace: store mcount address in rec->ip Record the address of the mcount call-site. Currently all archs except sparc64 record the address of the instruction following the mcount call-site. Some general cleanups are entailed. Storing mcount addresses in rec->ip enables looking them up in the kprobe hash table later on to check if they're kprobe'd. Signed-off-by: Abhishek Sagar Cc: davem@davemloft.net Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 20e14d0093c7..366098d591de 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -31,7 +31,6 @@ int unregister_ftrace_function(struct ftrace_ops *ops); void clear_ftrace_function(void); extern void ftrace_stub(unsigned long a0, unsigned long a1); -extern void mcount(void); #else /* !CONFIG_FTRACE */ # define register_ftrace_function(ops) do { } while (0) @@ -54,7 +53,7 @@ enum { struct dyn_ftrace { struct hlist_node node; - unsigned long ip; + unsigned long ip; /* address of mcount call-site */ unsigned long flags; }; -- cgit v1.2.2 From 785656a41f9a9c0e843a23d1ae05d900b5158f8f Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 21 Jun 2008 23:47:39 +0530 Subject: kprobes: enable clean usage of get_kprobe Allow clean use of get_kprobe() outside of core kprobe code. Ftrace makes use of get_kprobe to identify probes installed on mcount call-sites. Signed-off-by: Abhishek Sagar Acked-by: Ananth N Mavinakayanahalli Cc: Masami Hiramatsu Cc: jkenisto@us.ibm.com Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 1036631ff4fa..04a3556bdea6 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -259,6 +259,10 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); struct jprobe; struct kretprobe; +static inline struct kprobe *get_kprobe(void *addr) +{ + return NULL; +} static inline struct kprobe *kprobe_running(void) { return NULL; -- cgit v1.2.2 From ecea656d1d5e912d2f3d332657ea4a6d8380f891 Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 21 Jun 2008 23:47:53 +0530 Subject: ftrace: freeze kprobe'd records Let records identified as being kprobe'd be marked as "frozen". The trouble with records which have a kprobe installed on their mcount call-site is that they don't get updated. So if such a function which is currently being traced gets its tracing disabled due to a new filter rule (or because it was added to the notrace list) then it won't be updated and continue being traced. This patch allows scanning of all frozen records during tracing to check if they should be traced. Signed-off-by: Abhishek Sagar Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 366098d591de..3121b95443d9 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -49,6 +49,7 @@ enum { FTRACE_FL_ENABLED = (1 << 3), FTRACE_FL_NOTRACE = (1 << 4), FTRACE_FL_CONVERTED = (1 << 5), + FTRACE_FL_FROZEN = (1 << 6), }; struct dyn_ftrace { @@ -73,15 +74,18 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +extern int skip_trace(unsigned long ip); + void ftrace_disable_daemon(void); void ftrace_enable_daemon(void); #else +# define skip_trace(ip) ({ 0; }) # define ftrace_force_update() ({ 0; }) # define ftrace_set_filter(buf, len, reset) do { } while (0) # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) -#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); -- cgit v1.2.2 From 3da757daf86e498872855f0b5e101f763ba79499 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Fri, 20 Jun 2008 15:06:33 -0700 Subject: x86: use cpu_khz for loops_per_jiffy calculation On the x86 platform we can use the value of tsc_khz computed during tsc calibration to calculate the loops_per_jiffy value. Its very important to keep the error in lpj values to minimum as any error in that may result in kernel panic in check_timer. In virtualization environment, On a highly overloaded host the guest delay calibration may sometimes result in errors beyond the ~50% that timer_irq_works can handle, resulting in the guest panicking. Does some formating changes to lpj_setup code to now have a single printk to print the bogomips value. We do this only for the boot processor because the AP's can have different base frequencies or the BIOS might boot a AP at a different frequency. Signed-off-by: Alok N Kataria Cc: Arjan van de Ven Cc: Daniel Hecht Cc: Tim Mann Cc: Zach Amsden Cc: Sahil Rihan Signed-off-by: Ingo Molnar --- include/linux/delay.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/delay.h b/include/linux/delay.h index 54552d21296e..01aec60590ab 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -41,6 +41,7 @@ static inline void ndelay(unsigned long x) #define ndelay(x) ndelay(x) #endif +extern unsigned long lpj_tsc; void calibrate_delay(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); -- cgit v1.2.2 From 961ccddd59d627b89bd3dc284b6517833bbdf25d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 23 Jun 2008 13:55:38 +1000 Subject: sched: add new API sched_setscheduler_nocheck: add a flag to control access checks Hidehiro Kawai noticed that sched_setscheduler() can fail in stop_machine: it calls sched_setscheduler() from insmod, which can have CAP_SYS_MODULE without CAP_SYS_NICE. Two cases could have failed, so are changed to sched_setscheduler_nocheck: kernel/softirq.c:cpu_callback() - CPU hotplug callback kernel/stop_machine.c:__stop_machine_run() - Called from various places, including modprobe() Signed-off-by: Rusty Russell Cc: Jeremy Fitzhardinge Cc: Hidehiro Kawai Cc: Andrew Morton Cc: linux-mm@kvack.org Cc: sugita Cc: Satoshi OSHIMA Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c5d3f847ca8d..fe3b9b5d7390 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1655,6 +1655,8 @@ extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); +extern int sched_setscheduler_nocheck(struct task_struct *, int, + struct sched_param *); extern struct task_struct *idle_task(int cpu); extern struct task_struct *curr_task(int cpu); extern void set_curr_task(int cpu, struct task_struct *p); -- cgit v1.2.2 From a033c332e047397904ed74816946b2edd9b0d5cd Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 23 Jun 2008 10:52:42 +0800 Subject: lockdep: remove duplicate definition of STATIC_LOCKDEP_MAP_INIT STATIC_LOCKDEP_MAP_INIT is defined twice in lockdep.h. I guess it's a copy & paste. Signed-off-by: Li Zefan Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b26fbc715a50..2486eb4edbf1 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -278,14 +278,6 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name, lockdep_init_map(&(lock)->dep_map, #lock, \ (lock)->dep_map.key, sub) -/* - * To initialize a lockdep_map statically use this macro. - * Note that _name must not be NULL. - */ -#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ - { .name = (_name), .key = (void *)(_key), } - - /* * Acquire a lock. * -- cgit v1.2.2 From f3f3149f35b9195ef4b761b1353fc0766b5f53be Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Mon, 23 Jun 2008 18:21:56 -0700 Subject: x86: use cpu_khz for loops_per_jiffy calculation, cleanup As suggested by Ingo, remove all references to tsc from init/calibrate.c TSC is x86 specific, and using tsc in variable names in a generic file should be avoided. lpj_tsc is now called lpj_fine, since it is related to fine tuning of lpj value. Also tsc_rate_* is called timer_rate_* Signed-off-by: Alok N Kataria Cc: Arjan van de Ven Cc: Daniel Hecht Cc: Tim Mann Cc: Zach Amsden Cc: Sahil Rihan Signed-off-by: Ingo Molnar --- include/linux/delay.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/delay.h b/include/linux/delay.h index 01aec60590ab..fd832c6d419e 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -41,7 +41,7 @@ static inline void ndelay(unsigned long x) #define ndelay(x) ndelay(x) #endif -extern unsigned long lpj_tsc; +extern unsigned long lpj_fine; void calibrate_delay(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); -- cgit v1.2.2 From 3d4422332711ef48ef0f132f1fcbfcbd56c7f3d1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 26 Jun 2008 11:21:34 +0200 Subject: Add generic helpers for arch IPI function calls This adds kernel/smp.c which contains helpers for IPI function calls. In addition to supporting the existing smp_call_function() in a more efficient manner, it also adds a more scalable variant called smp_call_function_single() for calling a given function on a single CPU only. The core of this is based on the x86-64 patch from Nick Piggin, lots of changes since then. "Alan D. Brunelle" has contributed lots of fixes and suggestions as well. Also thanks to Paul E. McKenney for reviewing RCU usage and getting rid of the data allocation fallback deadlock. Acked-by: Ingo Molnar Reviewed-by: Paul E. McKenney Signed-off-by: Jens Axboe --- include/linux/smp.h | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 55232ccf9cfd..eac3e062250f 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -7,9 +7,19 @@ */ #include +#include +#include +#include extern void cpu_idle(void); +struct call_single_data { + struct list_head list; + void (*func) (void *info); + void *info; + unsigned int flags; +}; + #ifdef CONFIG_SMP #include @@ -53,9 +63,28 @@ extern void smp_cpus_done(unsigned int max_cpus); * Call a function on all other processors */ int smp_call_function(void(*func)(void *info), void *info, int retry, int wait); - +int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, + int wait); int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, int retry, int wait); +void __smp_call_function_single(int cpuid, struct call_single_data *data); + +/* + * Generic and arch helpers + */ +#ifdef CONFIG_USE_GENERIC_SMP_HELPERS +void generic_smp_call_function_single_interrupt(void); +void generic_smp_call_function_interrupt(void); +void init_call_single_data(void); +void ipi_call_lock(void); +void ipi_call_unlock(void); +void ipi_call_lock_irq(void); +void ipi_call_unlock_irq(void); +#else +static inline void init_call_single_data(void) +{ +} +#endif /* * Call a function on all processors @@ -112,7 +141,9 @@ static inline void smp_send_reschedule(int cpu) { } }) #define smp_call_function_mask(mask, func, info, wait) \ (up_smp_call_function(func, info)) - +static inline void init_call_single_data(void) +{ +} #endif /* !SMP */ /* -- cgit v1.2.2 From 8691e5a8f691cc2a4fda0651e8d307aaba0e7d68 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 6 Jun 2008 11:18:06 +0200 Subject: smp_call_function: get rid of the unused nonatomic/retry argument It's never used and the comments refer to nonatomic and retry interchangably. So get rid of it. Acked-by: Jeremy Fitzhardinge Signed-off-by: Jens Axboe --- include/linux/smp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index eac3e062250f..338cad1b9548 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -62,11 +62,11 @@ extern void smp_cpus_done(unsigned int max_cpus); /* * Call a function on all other processors */ -int smp_call_function(void(*func)(void *info), void *info, int retry, int wait); +int smp_call_function(void(*func)(void *info), void *info, int wait); int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, int wait); int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, - int retry, int wait); + int wait); void __smp_call_function_single(int cpuid, struct call_single_data *data); /* @@ -119,7 +119,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) { return 0; } -#define smp_call_function(func, info, retry, wait) \ +#define smp_call_function(func, info, wait) \ (up_smp_call_function(func, info)) #define on_each_cpu(func,info,retry,wait) \ ({ \ @@ -131,7 +131,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) static inline void smp_send_reschedule(int cpu) { } #define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) -#define smp_call_function_single(cpuid, func, info, retry, wait) \ +#define smp_call_function_single(cpuid, func, info, wait) \ ({ \ WARN_ON(cpuid != 0); \ local_irq_disable(); \ -- cgit v1.2.2 From 15c8b6c1aaaf1c4edd67e2f02e4d8e1bd1a51c0d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 May 2008 09:39:44 +0200 Subject: on_each_cpu(): kill unused 'retry' parameter It's not even passed on to smp_call_function() anymore, since that was removed. So kill it. Acked-by: Jeremy Fitzhardinge Reviewed-by: Paul E. McKenney Signed-off-by: Jens Axboe --- include/linux/smp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 338cad1b9548..55261101d09a 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -89,7 +89,7 @@ static inline void init_call_single_data(void) /* * Call a function on all processors */ -int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait); +int on_each_cpu(void (*func) (void *info), void *info, int wait); #define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ #define MSG_ALL 0x8001 @@ -121,7 +121,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) } #define smp_call_function(func, info, wait) \ (up_smp_call_function(func, info)) -#define on_each_cpu(func,info,retry,wait) \ +#define on_each_cpu(func,info,wait) \ ({ \ local_irq_disable(); \ func(info); \ -- cgit v1.2.2 From 1bdad606338debc6384b2844f1b53cc436b3ac90 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 3 Jun 2008 14:09:53 +0100 Subject: [GFS2] Remove remote lock dropping code There are several reasons why this is undesirable: 1. It never happens during normal operation anyway 2. If it does happen it causes performance to be very, very poor 3. It isn't likely to solve the original problem (memory shortage on remote DLM node) it was supposed to solve 4. It uses a bunch of arbitrary constants which are unlikely to be correct for any particular situation and for which the tuning seems to be a black art. 5. In an N node cluster, only 1/N of the dropped locked will actually contribute to solving the problem on average. So all in all we are better off without it. This also makes merging the lock_dlm module into GFS2 a bit easier. Signed-off-by: Steven Whitehouse --- include/linux/lm_interface.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lm_interface.h b/include/linux/lm_interface.h index f274997bc283..d0a7112b9719 100644 --- a/include/linux/lm_interface.h +++ b/include/linux/lm_interface.h @@ -138,9 +138,6 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data); * LM_CB_NEED_RECOVERY * The given journal needs to be recovered. * - * LM_CB_DROPLOCKS - * Reduce the number of cached locks. - * * LM_CB_ASYNC * The given lock has been granted. */ @@ -149,7 +146,6 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data); #define LM_CB_NEED_D 258 #define LM_CB_NEED_S 259 #define LM_CB_NEED_RECOVERY 260 -#define LM_CB_DROPLOCKS 261 #define LM_CB_ASYNC 262 /* -- cgit v1.2.2 From b2cad26cfc2091050574a460b304ed103a35dbda Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 3 Jun 2008 14:34:14 +0100 Subject: [GFS2] Remove obsolete conversion deadlock avoidance code This is only used by GFS1 so can be removed. Signed-off-by: Steven Whitehouse --- include/linux/lm_interface.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lm_interface.h b/include/linux/lm_interface.h index d0a7112b9719..2ed8fa1b762b 100644 --- a/include/linux/lm_interface.h +++ b/include/linux/lm_interface.h @@ -122,11 +122,9 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data); */ #define LM_OUT_ST_MASK 0x00000003 -#define LM_OUT_CACHEABLE 0x00000004 #define LM_OUT_CANCELED 0x00000008 #define LM_OUT_ASYNC 0x00000080 #define LM_OUT_ERROR 0x00000100 -#define LM_OUT_CONV_DEADLK 0x00000200 /* * lm_callback_t types -- cgit v1.2.2 From c09595f63bb1909c5dc4dca288f4fe818561b5f3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Jun 2008 13:41:14 +0200 Subject: sched: revert revert of: fair-group: SMP-nice for group scheduling Try again.. Initial commit: 18d95a2832c1392a2d63227a7a6d433cb9f2037e Revert: 6363ca57c76b7b83639ca8c83fc285fa26a7880e Signed-off-by: Peter Zijlstra Cc: Srivatsa Vaddagiri Cc: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index eaf821072dbd..97a58b622ee1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -765,6 +765,7 @@ struct sched_domain { struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ cpumask_t span; /* span of all CPUs in this domain */ + int first_cpu; /* cache of the first cpu in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ -- cgit v1.2.2 From b6a86c746f5b708012809958462234d19e9c8177 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Jun 2008 13:41:18 +0200 Subject: sched: fix sched_domain aggregation Keeping the aggregate on the first cpu of the sched domain has two problems: - it could collide between different sched domains on different cpus - it could slow things down because of the remote accesses Signed-off-by: Peter Zijlstra Cc: Srivatsa Vaddagiri Cc: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 97a58b622ee1..eaf821072dbd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -765,7 +765,6 @@ struct sched_domain { struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ cpumask_t span; /* span of all CPUs in this domain */ - int first_cpu; /* cache of the first cpu in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ -- cgit v1.2.2 From 2398f2c6d34b43025f274fc42eaca34d23ec2320 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Jun 2008 13:41:35 +0200 Subject: sched: update shares on wakeup We found that the affine wakeup code needs rather accurate load figures to be effective. The trouble is that updating the load figures is fairly expensive with group scheduling. Therefore ratelimit the updating. Signed-off-by: Peter Zijlstra Cc: Srivatsa Vaddagiri Cc: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index eaf821072dbd..835b6c6fcc56 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -783,6 +783,8 @@ struct sched_domain { unsigned int balance_interval; /* initialise to 1. units in ms. */ unsigned int nr_balance_failed; /* initialise to 0 */ + u64 last_update; + #ifdef CONFIG_SCHEDSTATS /* load_balance() stats */ unsigned int lb_count[CPU_MAX_IDLE_TYPES]; @@ -1605,6 +1607,7 @@ extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; +extern unsigned int sysctl_sched_shares_ratelimit; int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, -- cgit v1.2.2 From 9465efc9e96135a2cec8154c0c766fa59984a298 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 27 Jun 2008 11:05:24 +0200 Subject: Remove BKL from remote_llseek v2 - Replace remote_llseek with generic_file_llseek_unlocked (to force compilation failures in all users) - Change all users to either use generic_file_llseek_unlocked directly or take the BKL around. I changed the file systems who don't use the BKL for anything (CIFS, GFS) to call it directly. NCPFS and SMBFS and NFS take the BKL, but explicitely in their own source now. I moved them all over in a single patch to avoid unbisectable sections. Open problem: 32bit kernels can corrupt fpos because its modification is not atomic, but they can do that anyways because there's other paths who modify it without BKL. Do we need a special lock for the pos/f_version = 0 checks? Trond says the NFS BKL is likely not needed, but keep it for now until his full audit. v2: Use generic_file_llseek_unlocked instead of remote_llseek_unlocked and factor duplicated code (suggested by hch) Cc: Trond.Myklebust@netapp.com Cc: swhiteho@redhat.com Cc: sfrench@samba.org Cc: vandrove@vc.cvut.cz Signed-off-by: Andi Kleen Signed-off-by: Andi Kleen Signed-off-by: Jonathan Corbet --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f413085f748e..b158e5161bca 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1871,7 +1871,8 @@ extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); -extern loff_t remote_llseek(struct file *file, loff_t offset, int origin); +extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, + int origin); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); -- cgit v1.2.2 From 02c62304e6af60f1963695c6bc1bbffe619aa585 Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Wed, 11 Jun 2008 09:12:52 +0200 Subject: Added in user-injected messages into blk traces This allows a user to annotate the blk trace stream: writing a suitable message to {/sys/kernel/debug}/block//msg will have it propagated into the trace stream. Signed-off-by: Alan D. Brunelle Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index e3ef903aae88..d084b8d227a5 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -129,6 +129,7 @@ struct blk_trace { u32 dev; struct dentry *dir; struct dentry *dropped_file; + struct dentry *msg_file; atomic_t dropped; }; -- cgit v1.2.2 From 244b4d56f85bcd11b21ab0b94845a3dabeed5c10 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 12 Jun 2008 20:12:36 +0200 Subject: block: kill request_queue_t Everything was moved to struct request_queue a few kernel revisions ago, maintaining the deprecated typedef to avoid breaking things. Now the time has come to get rid of that typedef. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d2a1b71e93c3..6a3da6717135 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -23,7 +23,6 @@ struct scsi_ioctl_command; struct request_queue; -typedef struct request_queue request_queue_t __deprecated; struct elevator_queue; typedef struct elevator_queue elevator_t; struct request_pm_state; -- cgit v1.2.2 From 51d654e1d885607a6edd02b337105fa5c28b6d33 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 17 Jun 2008 18:59:56 +0200 Subject: block: Globalize bio_set and bio_vec_slab Move struct bio_set and biovec_slab definitions to bio.h so they can be used outside of bio.c. Signed-off-by: Martin K. Petersen Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/bio.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 61c15eaf3fb3..49dfb3cb7460 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -333,6 +333,35 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, int, int); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); +extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); + +/* + * bio_set is used to allow other portions of the IO system to + * allocate their own private memory pools for bio and iovec structures. + * These memory pools in turn all allocate from the bio_slab + * and the bvec_slabs[]. + */ +#define BIO_POOL_SIZE 2 +#define BIOVEC_NR_POOLS 6 + +struct bio_set { + mempool_t *bio_pool; + mempool_t *bvec_pools[BIOVEC_NR_POOLS]; +}; + +struct biovec_slab { + int nr_vecs; + char *name; + struct kmem_cache *slab; +}; + +extern struct bio_set *fs_bio_set; + +/* + * a small number of entries is fine, not going to be performance critical. + * basically we just need to survive + */ +#define BIO_SPLIT_ENTRIES 2 #ifdef CONFIG_HIGHMEM /* -- cgit v1.2.2 From 7ba1ba12eeef0aa7113beb16410ef8b7c748e18b Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 30 Jun 2008 20:04:41 +0200 Subject: block: Block layer data integrity support Some block devices support verifying the integrity of requests by way of checksums or other protection information that is submitted along with the I/O. This patch implements support for generating and verifying integrity metadata, as well as correctly merging, splitting and cloning bios and requests that have this extra information attached. See Documentation/block/data-integrity.txt for more information. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/bio.h | 94 +++++++++++++++++++++++++++++++++++++++++-- include/linux/blkdev.h | 105 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/genhd.h | 3 ++ 3 files changed, 198 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 49dfb3cb7460..6bfc3e8d9d89 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -64,6 +64,7 @@ struct bio_vec { struct bio_set; struct bio; +struct bio_integrity_payload; typedef void (bio_end_io_t) (struct bio *, int); typedef void (bio_destructor_t) (struct bio *); @@ -112,6 +113,9 @@ struct bio { atomic_t bi_cnt; /* pin count */ void *bi_private; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + struct bio_integrity_payload *bi_integrity; /* data integrity */ +#endif bio_destructor_t *bi_destructor; /* destructor */ }; @@ -271,6 +275,29 @@ static inline void *bio_data(struct bio *bio) */ #define bio_get(bio) atomic_inc(&(bio)->bi_cnt) +#if defined(CONFIG_BLK_DEV_INTEGRITY) +/* + * bio integrity payload + */ +struct bio_integrity_payload { + struct bio *bip_bio; /* parent bio */ + struct bio_vec *bip_vec; /* integrity data vector */ + + sector_t bip_sector; /* virtual start sector */ + + void *bip_buf; /* generated integrity data */ + bio_end_io_t *bip_end_io; /* saved I/O completion fn */ + + int bip_error; /* saved I/O error */ + unsigned int bip_size; + + unsigned short bip_pool; /* pool the ivec came from */ + unsigned short bip_vcnt; /* # of integrity bio_vecs */ + unsigned short bip_idx; /* current bip_vec index */ + + struct work_struct bip_work; /* I/O completion */ +}; +#endif /* CONFIG_BLK_DEV_INTEGRITY */ /* * A bio_pair is used when we need to split a bio. @@ -283,10 +310,14 @@ static inline void *bio_data(struct bio *bio) * in bio2.bi_private */ struct bio_pair { - struct bio bio1, bio2; - struct bio_vec bv1, bv2; - atomic_t cnt; - int error; + struct bio bio1, bio2; + struct bio_vec bv1, bv2; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + struct bio_integrity_payload bip1, bip2; + struct bio_vec iv1, iv2; +#endif + atomic_t cnt; + int error; }; extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors); @@ -334,6 +365,7 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); +extern unsigned int bvec_nr_vecs(unsigned short idx); /* * bio_set is used to allow other portions of the IO system to @@ -346,6 +378,9 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set struct bio_set { mempool_t *bio_pool; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + mempool_t *bio_integrity_pool; +#endif mempool_t *bvec_pools[BIOVEC_NR_POOLS]; }; @@ -410,5 +445,56 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) +#if defined(CONFIG_BLK_DEV_INTEGRITY) + +#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) +#define bip_vec(bip) bip_vec_idx(bip, 0) + +#define __bip_for_each_vec(bvl, bip, i, start_idx) \ + for (bvl = bip_vec_idx((bip), (start_idx)), i = (start_idx); \ + i < (bip)->bip_vcnt; \ + bvl++, i++) + +#define bip_for_each_vec(bvl, bip, i) \ + __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) + +#define bio_integrity(bio) ((bio)->bi_integrity ? 1 : 0) + +extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); +extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); +extern void bio_integrity_free(struct bio *, struct bio_set *); +extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); +extern int bio_integrity_enabled(struct bio *bio); +extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); +extern int bio_integrity_get_tag(struct bio *, void *, unsigned int); +extern int bio_integrity_prep(struct bio *); +extern void bio_integrity_endio(struct bio *, int); +extern void bio_integrity_advance(struct bio *, unsigned int); +extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); +extern void bio_integrity_split(struct bio *, struct bio_pair *, int); +extern int bio_integrity_clone(struct bio *, struct bio *, struct bio_set *); +extern int bioset_integrity_create(struct bio_set *, int); +extern void bioset_integrity_free(struct bio_set *); +extern void bio_integrity_init_slab(void); + +#else /* CONFIG_BLK_DEV_INTEGRITY */ + +#define bio_integrity(a) (0) +#define bioset_integrity_create(a, b) (0) +#define bio_integrity_prep(a) (0) +#define bio_integrity_enabled(a) (0) +#define bio_integrity_clone(a, b, c) (0) +#define bioset_integrity_free(a) do { } while (0) +#define bio_integrity_free(a, b) do { } while (0) +#define bio_integrity_endio(a, b) do { } while (0) +#define bio_integrity_advance(a, b) do { } while (0) +#define bio_integrity_trim(a, b, c) do { } while (0) +#define bio_integrity_split(a, b, c) do { } while (0) +#define bio_integrity_set_tag(a, b, c) do { } while (0) +#define bio_integrity_get_tag(a, b, c) do { } while (0) +#define bio_integrity_init_slab(a) do { } while (0) + +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + #endif /* CONFIG_BLOCK */ #endif /* __LINUX_BIO_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6a3da6717135..4a9ed45270ff 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -112,6 +112,7 @@ enum rq_flag_bits { __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_RW_META, /* metadata io request */ __REQ_COPY_USER, /* contains copies of user pages */ + __REQ_INTEGRITY, /* integrity metadata has been remapped */ __REQ_NR_BITS, /* stops here */ }; @@ -134,6 +135,7 @@ enum rq_flag_bits { #define REQ_ALLOCED (1 << __REQ_ALLOCED) #define REQ_RW_META (1 << __REQ_RW_META) #define REQ_COPY_USER (1 << __REQ_COPY_USER) +#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) #define BLK_MAX_CDB 16 @@ -865,6 +867,109 @@ void kblockd_flush_work(struct work_struct *work); MODULE_ALIAS("block-major-" __stringify(major) "-*") +#if defined(CONFIG_BLK_DEV_INTEGRITY) + +#define INTEGRITY_FLAG_READ 1 /* verify data integrity on read */ +#define INTEGRITY_FLAG_WRITE 2 /* generate data integrity on write */ + +struct blk_integrity_exchg { + void *prot_buf; + void *data_buf; + sector_t sector; + unsigned int data_size; + unsigned short sector_size; + const char *disk_name; +}; + +typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); +typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *); +typedef void (integrity_set_tag_fn) (void *, void *, unsigned int); +typedef void (integrity_get_tag_fn) (void *, void *, unsigned int); + +struct blk_integrity { + integrity_gen_fn *generate_fn; + integrity_vrfy_fn *verify_fn; + integrity_set_tag_fn *set_tag_fn; + integrity_get_tag_fn *get_tag_fn; + + unsigned short flags; + unsigned short tuple_size; + unsigned short sector_size; + unsigned short tag_size; + + const char *name; + + struct kobject kobj; +}; + +extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); +extern void blk_integrity_unregister(struct gendisk *); +extern int blk_integrity_compare(struct block_device *, struct block_device *); +extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); +extern int blk_rq_count_integrity_sg(struct request *); + +static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) +{ + if (bi) + return bi->tuple_size; + + return 0; +} + +static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) +{ + return bdev->bd_disk->integrity; +} + +static inline unsigned int bdev_get_tag_size(struct block_device *bdev) +{ + struct blk_integrity *bi = bdev_get_integrity(bdev); + + if (bi) + return bi->tag_size; + + return 0; +} + +static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) +{ + struct blk_integrity *bi = bdev_get_integrity(bdev); + + if (bi == NULL) + return 0; + + if (rw == READ && bi->verify_fn != NULL && + test_bit(INTEGRITY_FLAG_READ, &bi->flags)) + return 1; + + if (rw == WRITE && bi->generate_fn != NULL && + test_bit(INTEGRITY_FLAG_WRITE, &bi->flags)) + return 1; + + return 0; +} + +static inline int blk_integrity_rq(struct request *rq) +{ + BUG_ON(rq->bio == NULL); + + return bio_integrity(rq->bio); +} + +#else /* CONFIG_BLK_DEV_INTEGRITY */ + +#define blk_integrity_rq(rq) (0) +#define blk_rq_count_integrity_sg(a) (0) +#define blk_rq_map_integrity_sg(a, b) (0) +#define bdev_get_integrity(a) (0) +#define bdev_get_tag_size(a) (0) +#define blk_integrity_compare(a, b) (0) +#define blk_integrity_register(a, b) (0) +#define blk_integrity_unregister(a) do { } while (0); + +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + + #else /* CONFIG_BLOCK */ /* * stubs for when the block layer is configured out diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ae7aec3cabee..524ec96f5a23 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -141,6 +141,9 @@ struct gendisk { struct disk_stats dkstats; #endif struct work_struct async_notify; +#ifdef CONFIG_BLK_DEV_INTEGRITY + struct blk_integrity *integrity; +#endif }; /* -- cgit v1.2.2 From da9cbc87395308a21465bd25441297bbba0477e1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 30 Jun 2008 20:42:08 +0200 Subject: block: blkdev.h cleanup, move iocontext stuff to iocontext.h Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 17 ----------------- include/linux/iocontext.h | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4a9ed45270ff..443df75d2cde 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -33,12 +33,6 @@ struct sg_io_hdr; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ -int put_io_context(struct io_context *ioc); -void exit_io_context(void); -struct io_context *get_io_context(gfp_t gfp_flags, int node); -struct io_context *alloc_io_context(gfp_t gfp_flags, int node); -void copy_io_context(struct io_context **pdst, struct io_context **psrc); - struct request; typedef void (rq_end_io_fn)(struct request *, int); @@ -981,17 +975,6 @@ static inline long nr_blockdev_pages(void) return 0; } -static inline void exit_io_context(void) -{ -} - -struct io_context; -static inline int put_io_context(struct io_context *ioc) -{ - return 1; -} - - #endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 2b7a1187cb29..08b987bccf89 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -99,4 +99,22 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc) return NULL; } +#ifdef CONFIG_BLOCK +int put_io_context(struct io_context *ioc); +void exit_io_context(void); +struct io_context *get_io_context(gfp_t gfp_flags, int node); +struct io_context *alloc_io_context(gfp_t gfp_flags, int node); +void copy_io_context(struct io_context **pdst, struct io_context **psrc); +#else +static inline void exit_io_context(void) +{ +} + +struct io_context; +static inline int put_io_context(struct io_context *ioc) +{ + return 1; +} +#endif + #endif -- cgit v1.2.2 From 6e2401ad6f33de15ff00f78b88159f00a14f3b35 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 18 Jun 2008 10:15:02 +0200 Subject: block: integrity cleanups - No need to check for NULL bio, we'll get an immediate oops anyway. - Make bio_integrity() a proper function. Signed-off-by: Jens Axboe --- include/linux/bio.h | 9 ++++++++- include/linux/blkdev.h | 4 ---- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 6bfc3e8d9d89..0933a14e6414 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -458,7 +458,14 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, #define bip_for_each_vec(bvl, bip, i) \ __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) -#define bio_integrity(bio) ((bio)->bi_integrity ? 1 : 0) +static inline int bio_integrity(struct bio *bio) +{ +#if defined(CONFIG_BLK_DEV_INTEGRITY) + return bio->bi_integrity != NULL; +#else + return 0; +#endif +} extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 443df75d2cde..d3ae9ad97213 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -860,7 +860,6 @@ void kblockd_flush_work(struct work_struct *work); #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ MODULE_ALIAS("block-major-" __stringify(major) "-*") - #if defined(CONFIG_BLK_DEV_INTEGRITY) #define INTEGRITY_FLAG_READ 1 /* verify data integrity on read */ @@ -945,8 +944,6 @@ static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) static inline int blk_integrity_rq(struct request *rq) { - BUG_ON(rq->bio == NULL); - return bio_integrity(rq->bio); } @@ -963,7 +960,6 @@ static inline int blk_integrity_rq(struct request *rq) #endif /* CONFIG_BLK_DEV_INTEGRITY */ - #else /* CONFIG_BLOCK */ /* * stubs for when the block layer is configured out -- cgit v1.2.2 From 0b07de85a76e1346e675f0e98437378932473df7 Mon Sep 17 00:00:00 2001 From: Adel Gadllah Date: Thu, 26 Jun 2008 13:48:27 +0200 Subject: allow userspace to modify scsi command filter on per device basis This patch exports the per-gendisk command filter to user space through sysfs, so it can be changed by the system administrator. All users of the old cmd filter have been converted to use the new one. Original patch from Peter Jones. Signed-off-by: Adel Gadllah Signed-off-by: Peter Jones Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 10 +++++++++- include/linux/genhd.h | 9 +++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d3ae9ad97213..a842b776d099 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -671,7 +671,6 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); -extern int blk_verify_command(unsigned char *, int); extern void blk_unplug(struct request_queue *q); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) @@ -797,6 +796,15 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, extern int blkdev_issue_flush(struct block_device *, sector_t *); +/* +* command filter functions +*/ +extern int blk_verify_command(struct file *file, unsigned char *cmd); +extern int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter, + unsigned char *cmd, mode_t *f_mode); +extern int blk_register_filter(struct gendisk *disk); +extern void blk_unregister_filter(struct gendisk *disk); + #define MAX_PHYS_SEGMENTS 128 #define MAX_HW_SEGMENTS 128 #define SAFE_MAX_SECTORS 255 diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 524ec96f5a23..e8787417f65a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -110,6 +110,14 @@ struct hd_struct { #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 #define GENHD_FL_FAIL 64 +#define BLK_SCSI_MAX_CMDS (256) +#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) + +struct blk_scsi_cmd_filter { + unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; + unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; + struct kobject kobj; +}; struct gendisk { int major; /* major number of driver */ @@ -120,6 +128,7 @@ struct gendisk { struct hd_struct **part; /* [indexed by minor] */ struct block_device_operations *fops; struct request_queue *queue; + struct blk_scsi_cmd_filter cmd_filter; void *private_data; sector_t capacity; -- cgit v1.2.2 From b24498d477a14680fc3bb3ad884fa9fa76a2d237 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 27 Jun 2008 09:12:09 +0200 Subject: block: integrity flags can't use bit ops on unsigned short Just use normal open coded bit operations instead, they need not be atomic. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a842b776d099..7ab8acad5b6e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -870,8 +870,8 @@ void kblockd_flush_work(struct work_struct *work); #if defined(CONFIG_BLK_DEV_INTEGRITY) -#define INTEGRITY_FLAG_READ 1 /* verify data integrity on read */ -#define INTEGRITY_FLAG_WRITE 2 /* generate data integrity on write */ +#define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */ +#define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */ struct blk_integrity_exchg { void *prot_buf; @@ -940,11 +940,11 @@ static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) return 0; if (rw == READ && bi->verify_fn != NULL && - test_bit(INTEGRITY_FLAG_READ, &bi->flags)) + (bi->flags & INTEGRITY_FLAG_READ)) return 1; if (rw == WRITE && bi->generate_fn != NULL && - test_bit(INTEGRITY_FLAG_WRITE, &bi->flags)) + (bi->flags & INTEGRITY_FLAG_WRITE)) return 1; return 0; -- cgit v1.2.2 From cc371e66e340f35eed8dc4651c7c18e754c7fb26 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Thu, 3 Jul 2008 09:53:43 +0200 Subject: Add bvec_merge_data to handle stacked devices and ->merge_bvec() When devices are stacked, one device's merge_bvec_fn may need to perform the mapping and then call one or more functions for its underlying devices. The following bio fields are used: bio->bi_sector bio->bi_bdev bio->bi_size bio->bi_rw using bio_data_dir() This patch creates a new struct bvec_merge_data holding a copy of those fields to avoid having to change them directly in the struct bio when going down the stack only to have to change them back again on the way back up. (And then when the bio gets mapped for real, the whole exercise gets repeated, but that's a problem for another day...) Signed-off-by: Alasdair G Kergon Cc: Neil Brown Cc: Milan Broz Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7ab8acad5b6e..ff9d0bdf2a16 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -254,7 +254,14 @@ typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); struct bio_vec; -typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *); +struct bvec_merge_data { + struct block_device *bi_bdev; + sector_t bi_sector; + unsigned bi_size; + unsigned long bi_rw; +}; +typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, + struct bio_vec *); typedef void (prepare_flush_fn) (struct request_queue *, struct request *); typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); -- cgit v1.2.2 From e48ec69005f02b70b7ecfde1bc39a599086d16ef Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Jul 2008 13:18:54 +0200 Subject: block: extend queue_flag bitops Add test_and_clear and test_and_set. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ff9d0bdf2a16..e04c4ac8a7cf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -428,6 +428,32 @@ static inline void queue_flag_set_unlocked(unsigned int flag, __set_bit(flag, &q->queue_flags); } +static inline int queue_flag_test_and_clear(unsigned int flag, + struct request_queue *q) +{ + WARN_ON_ONCE(!queue_is_locked(q)); + + if (test_bit(flag, &q->queue_flags)) { + __clear_bit(flag, &q->queue_flags); + return 1; + } + + return 0; +} + +static inline int queue_flag_test_and_set(unsigned int flag, + struct request_queue *q) +{ + WARN_ON_ONCE(!queue_is_locked(q)); + + if (!test_bit(flag, &q->queue_flags)) { + __set_bit(flag, &q->queue_flags); + return 0; + } + + return 1; +} + static inline void queue_flag_set(unsigned int flag, struct request_queue *q) { WARN_ON_ONCE(!queue_is_locked(q)); -- cgit v1.2.2 From 42796d37da6ef4fd851dc6d5d0387baf7e2b0c3c Mon Sep 17 00:00:00 2001 From: eric miao Date: Mon, 14 Apr 2008 09:35:08 +0100 Subject: [ARM] pxa: add generic PWM backlight driver Patch mostly from Eric Miao, with minor edits by rmk to convert Eric's driver to a generic PWM-based backlight driver. Signed-off-by: eric miao Signed-off-by: Russell King --- include/linux/pwm_backlight.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/linux/pwm_backlight.h (limited to 'include/linux') diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h new file mode 100644 index 000000000000..aeeffedbe822 --- /dev/null +++ b/include/linux/pwm_backlight.h @@ -0,0 +1,14 @@ +/* + * Generic PWM backlight driver data - see drivers/video/backlight/pwm_bl.c + */ +#ifndef __LINUX_PWM_BACKLIGHT_H +#define __LINUX_PWM_BACKLIGHT_H + +struct platform_pwm_backlight_data { + int pwm_id; + unsigned int max_brightness; + unsigned int dft_brightness; + unsigned int pwm_period_ns; +}; + +#endif -- cgit v1.2.2 From 3b73125af69f93972625f4b655675f42ca4274eb Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 22 May 2008 14:18:40 +0100 Subject: [ARM] 5044/1: pwm_bl: add init/notify/exit callbacks This allows platform code to manipulate GPIOs and brightness level as needed. Signed-off-by: Philipp Zabel Signed-off-by: Russell King --- include/linux/pwm_backlight.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h index aeeffedbe822..7a9754c96775 100644 --- a/include/linux/pwm_backlight.h +++ b/include/linux/pwm_backlight.h @@ -9,6 +9,9 @@ struct platform_pwm_backlight_data { unsigned int max_brightness; unsigned int dft_brightness; unsigned int pwm_period_ns; + int (*init)(struct device *dev); + int (*notify)(int brightness); + void (*exit)(struct device *dev); }; #endif -- cgit v1.2.2 From 27f8221af406e43b529a5425bc99c9b1e9bdf521 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 4 Jul 2008 09:30:03 +0200 Subject: block: add blk_queue_update_dma_pad This adds blk_queue_update_dma_pad to prevent LLDs from overwriting the dma pad mask wrongly (we added blk_queue_update_dma_alignment due to the same reason). This also converts libata to use blk_queue_update_dma_pad instead of blk_queue_dma_pad. Signed-off-by: FUJITA Tomonori Cc: Tejun Heo Cc: Bartlomiej Zolnierkiewicz Cc: Thomas Bogendoerfer Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e04c4ac8a7cf..1ffd8bfdc4c9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -776,6 +776,7 @@ extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_dma_pad(struct request_queue *, unsigned int); +extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size); -- cgit v1.2.2 From ba8dd03ac09f51a69c154b8cb508b701d713a2cd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 4 Jul 2008 11:26:40 +0200 Subject: generic-ipi: fix s390 build bug forgot to remove #include from linux/smp.h while fixing the original s390 build bug. Patch below fixes this build bug caused by header inclusion dependencies: CC kernel/timer.o In file included from include/linux/spinlock.h:87, from include/linux/smp.h:11, from include/linux/kernel_stat.h:4, from kernel/timer.c:22: include/asm/spinlock.h: In function '__raw_spin_lock': include/asm/spinlock.h:69: error: implicit declaration of function 'smp_processor_id' Signed-off-by: Heiko Carstens Signed-off-by: Ingo Molnar --- include/linux/smp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 55261101d09a..48262f86c969 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -8,7 +8,6 @@ #include #include -#include #include extern void cpu_idle(void); -- cgit v1.2.2 From d2dbf343329dc777d77488743465f7be4245971d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 13 Jun 2008 02:00:56 -0700 Subject: x86: clean up reserve_bootmem_generic() and port it to 32-bit 1. add reserve_bootmem_generic for 32bit 2. change len to unsigned long 3. make early_res_to_bootmem to use it Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/bootmem.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 686895bacd9d..a1d9b79078ea 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -84,6 +84,8 @@ extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags); __alloc_bootmem_low(x, PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ +extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, + int flags); extern unsigned long free_all_bootmem(void); extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); extern void *__alloc_bootmem_node(pg_data_t *pgdat, -- cgit v1.2.2 From cc1050bafebfb1d7935331282e948b5016318192 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 13 Jun 2008 19:08:52 -0700 Subject: x86: replace shrink_active_range() with remove_active_range() in case we have kva before ramdisk on a node, we still need to use those ranges. v2: reserve_early kva ram area, in case there are holes in highmem, to avoid those area could be treat as free high pages. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ce8e397a61f6..034a3156d2f0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -998,7 +998,8 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat, extern void free_area_init_nodes(unsigned long *max_zone_pfn); extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); -extern void shrink_active_range(unsigned int nid, unsigned long new_end_pfn); +extern void remove_active_range(unsigned int nid, unsigned long start_pfn, + unsigned long end_pfn); extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); -- cgit v1.2.2 From b5bc6c0e55000dab86b73f838f5ad02908b23755 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 14 Jun 2008 18:32:52 -0700 Subject: x86, mm: use add_highpages_with_active_regions() for high pages init v2 use early_node_map to init high pages, so we can remove page_is_ram() and page_is_reserved_early() in the big loop with add_one_highpage also remove page_is_reserved_early(), it is not needed anymore. v2: fix the build of other platforms Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 034a3156d2f0..e4de460907c1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1011,6 +1011,8 @@ extern unsigned long find_min_pfn_with_active_regions(void); extern unsigned long find_max_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); +typedef void (*work_fn_t)(unsigned long, unsigned long, void *); +extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID extern int early_pfn_to_nid(unsigned long pfn); -- cgit v1.2.2 From 3461b0af025251bbc6b3d56c821c6ac2de6f7209 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: x86: remove static boot_cpu_pda array v2 * Remove the boot_cpu_pda array and pointer table from the data section. Allocate the pointer table and array during init. do_boot_cpu() will reallocate the pda in node local memory and if the cpu is being brought up before the bootmem array is released (after_bootmem = 0), then it will free the initial pda. This will happen for all cpus present at system startup. This removes 512k + 32k bytes from the data section. For inclusion into sched-devel/latest tree. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + sched-devel/latest .../mingo/linux-2.6-sched-devel.git Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 586a943cab01..0ea48a5af823 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1024,6 +1024,7 @@ extern void mem_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); +extern int after_bootmem; #ifdef CONFIG_NUMA extern void setup_per_cpu_pageset(void); -- cgit v1.2.2 From a7bf0bd5e6af7fe69342dabf2a3b721f0163469a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 May 2008 15:02:14 +0100 Subject: build: add __page_aligned_data and __page_aligned_bss Making a variable page-aligned by using __attribute__((section(".data.page_aligned"))) is fragile because if sizeof(variable) is not also a multiple of page size, it leaves variables in the remainder of the section unaligned. This patch introduces two new qualifiers, __page_aligned_data and __page_aligned_bss to set the section *and* the alignment of variables. This makes page-aligned variables more robust because the linker will make sure they're aligned properly. Unfortunately it requires *all* page-aligned data to use these macros... Signed-off-by: Ingo Molnar --- include/linux/linkage.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 2119610b24f8..9fd1f859021b 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -1,6 +1,7 @@ #ifndef _LINUX_LINKAGE_H #define _LINUX_LINKAGE_H +#include #include #ifdef __cplusplus @@ -17,6 +18,9 @@ # define asmregparm #endif +#define __page_aligned_data __section(.data.page_aligned) __aligned(PAGE_SIZE) +#define __page_aligned_bss __section(.bss.page_aligned) __aligned(PAGE_SIZE) + /* * This is used by architectures to keep arguments on the stack * untouched by the compiler by keeping them live until the end. -- cgit v1.2.2 From d52d53b8a5b258bfaab9223a5e7284fcfdd48577 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 16 Jun 2008 20:10:55 -0700 Subject: RFC x86: try to remove arch_get_ram_range want to remove arch_get_ram_range, and use early_node_map instead. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3d647b24041f..cf1cd3a2ed78 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1011,7 +1011,7 @@ extern unsigned long find_min_pfn_with_active_regions(void); extern unsigned long find_max_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); -typedef void (*work_fn_t)(unsigned long, unsigned long, void *); +typedef int (*work_fn_t)(unsigned long, unsigned long, void *); extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -- cgit v1.2.2 From 3c999f142665265afd0fe9190204dd051f17e505 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 20 Jun 2008 16:11:20 -0700 Subject: x86: check command line when CONFIG_X86_MPPARSE is not set, v2 if acpi=off, acpi=noirq and pci=noacpi, we need to disable apic. Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: "Maciej W. Rozycki" Signed-off-by: Ingo Molnar --- include/linux/acpi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 41f7ce7edd7a..0601075d09a1 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -82,6 +82,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); int acpi_boot_table_init (void); +int acpi_mps_check (void); int acpi_numa_init (void); int acpi_table_init (void); @@ -250,6 +251,11 @@ static inline int acpi_boot_table_init(void) return 0; } +static inline int acpi_mps_check(void) +{ + return 0; +} + static inline int acpi_check_resource_conflict(struct resource *res) { return 0; -- cgit v1.2.2 From 69ac9cd629ca96e59f34eb4ccd12d00b2c8276a7 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Fri, 27 Jun 2008 13:12:54 +0200 Subject: sysfs: add /sys/firmware/memmap This patch adds /sys/firmware/memmap interface that represents the BIOS (or Firmware) provided memory map. The tree looks like: /sys/firmware/memmap/0/start (hex number) end (hex number) type (string) ... /1/start end type With the following shell snippet one can print the memory map in the same form the kernel prints itself when booting on x86 (the E820 map). --------- 8< -------------------------- #!/bin/sh cd /sys/firmware/memmap for dir in * ; do start=$(cat $dir/start) end=$(cat $dir/end) type=$(cat $dir/type) printf "%016x-%016x (%s)\n" $start $[ $end +1] "$type" done --------- >8 -------------------------- That patch only provides the needed interface: 1. The sysfs interface. 2. The structure and enumeration definition. 3. The function firmware_map_add() and firmware_map_add_early() that should be called from architecture code (E820/EFI, for example) to add the contents to the interface. If the kernel is compiled without CONFIG_FIRMWARE_MEMMAP, the interface does nothing without cluttering the architecture-specific code with #ifdef's. The purpose of the new interface is kexec: While /proc/iomem represents the *used* memory map (e.g. modified via kernel parameters like 'memmap' and 'mem'), the /sys/firmware/memmap tree represents the unmodified memory map provided via the firmware. So kexec can: - use the original memory map for rebooting, - use the /proc/iomem for setting up the ELF core headers for kdump case that should only represent the memory of the system. The patch has been tested on i386 and x86_64. Signed-off-by: Bernhard Walle Acked-by: Greg KH Acked-by: Vivek Goyal Cc: kexec@lists.infradead.org Cc: yhlu.kernel@gmail.com Signed-off-by: Ingo Molnar --- include/linux/firmware-map.h | 74 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 include/linux/firmware-map.h (limited to 'include/linux') diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h new file mode 100644 index 000000000000..acbdbcc16051 --- /dev/null +++ b/include/linux/firmware-map.h @@ -0,0 +1,74 @@ +/* + * include/linux/firmware-map.h: + * Copyright (C) 2008 SUSE LINUX Products GmbH + * by Bernhard Walle + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License v2.0 as published by + * the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef _LINUX_FIRMWARE_MAP_H +#define _LINUX_FIRMWARE_MAP_H + +#include +#include + +/* + * provide a dummy interface if CONFIG_FIRMWARE_MEMMAP is disabled + */ +#ifdef CONFIG_FIRMWARE_MEMMAP + +/** + * Adds a firmware mapping entry. This function uses kmalloc() for memory + * allocation. Use firmware_map_add_early() if you want to use the bootmem + * allocator. + * + * That function must be called before late_initcall. + * + * @start: Start of the memory range. + * @end: End of the memory range (inclusive). + * @type: Type of the memory range. + * + * Returns 0 on success, or -ENOMEM if no memory could be allocated. + */ +int firmware_map_add(resource_size_t start, resource_size_t end, + const char *type); + +/** + * Adds a firmware mapping entry. This function uses the bootmem allocator + * for memory allocation. Use firmware_map_add() if you want to use kmalloc(). + * + * That function must be called before late_initcall. + * + * @start: Start of the memory range. + * @end: End of the memory range (inclusive). + * @type: Type of the memory range. + * + * Returns 0 on success, or -ENOMEM if no memory could be allocated. + */ +int firmware_map_add_early(resource_size_t start, resource_size_t end, + const char *type); + +#else /* CONFIG_FIRMWARE_MEMMAP */ + +static inline int firmware_map_add(resource_size_t start, resource_size_t end, + const char *type) +{ + return 0; +} + +static inline int firmware_map_add_early(resource_size_t start, + resource_size_t end, const char *type) +{ + return 0; +} + +#endif /* CONFIG_FIRMWARE_MEMMAP */ + +#endif /* _LINUX_FIRMWARE_MAP_H */ -- cgit v1.2.2 From a861beb1401d65e3f095fee074c13645ab06490e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 8 Jul 2008 19:27:22 +0200 Subject: ide: add __ide_default_irq() inline helper Add __ide_default_irq() inline helper and use it instead of ide_default_irq() in ide-probe.c and ns87415.c (all host drivers except IDE PCI ones always setup hwif->irq so it is enough to check only for I/O bases 0x1f0 and 0x170). This fixes post-2.6.25 regression since ide_default_irq() define could shadow ide_default_irq() inline. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 9918772bf274..eddb6daadf4a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -189,6 +189,21 @@ static inline void ide_std_init_ports(hw_regs_t *hw, hw->io_ports.ctl_addr = ctl_addr; } +/* for IDE PCI controllers in legacy mode, temporary */ +static inline int __ide_default_irq(unsigned long base) +{ + switch (base) { +#ifdef CONFIG_IA64 + case 0x1f0: return isa_irq_to_vector(14); + case 0x170: return isa_irq_to_vector(15); +#else + case 0x1f0: return 14; + case 0x170: return 15; +#endif + } + return 0; +} + #include #if !defined(MAX_HWIFS) || defined(CONFIG_EMBEDDED) -- cgit v1.2.2 From 004a403c2e954734090a69aedc7f4f822bdcc142 Mon Sep 17 00:00:00 2001 From: Loc Ho Date: Wed, 14 May 2008 20:41:47 +0800 Subject: [CRYPTO] hash: Add asynchronous hash support This patch adds asynchronous hash and digest support. Signed-off-by: Loc Ho Signed-off-by: Herbert Xu --- include/linux/crypto.h | 187 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 183 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 425824bd49f3..b6efe569128d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -30,15 +30,17 @@ */ #define CRYPTO_ALG_TYPE_MASK 0x0000000f #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 -#define CRYPTO_ALG_TYPE_DIGEST 0x00000002 -#define CRYPTO_ALG_TYPE_HASH 0x00000003 +#define CRYPTO_ALG_TYPE_COMPRESS 0x00000002 +#define CRYPTO_ALG_TYPE_AEAD 0x00000003 #define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_ABLKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_GIVCIPHER 0x00000006 -#define CRYPTO_ALG_TYPE_COMPRESS 0x00000008 -#define CRYPTO_ALG_TYPE_AEAD 0x00000009 +#define CRYPTO_ALG_TYPE_DIGEST 0x00000008 +#define CRYPTO_ALG_TYPE_HASH 0x00000009 +#define CRYPTO_ALG_TYPE_AHASH 0x0000000a #define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e +#define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000c #define CRYPTO_ALG_TYPE_BLKCIPHER_MASK 0x0000000c #define CRYPTO_ALG_LARVAL 0x00000010 @@ -102,6 +104,7 @@ struct crypto_async_request; struct crypto_aead; struct crypto_blkcipher; struct crypto_hash; +struct crypto_ahash; struct crypto_tfm; struct crypto_type; struct aead_givcrypt_request; @@ -131,6 +134,18 @@ struct ablkcipher_request { void *__ctx[] CRYPTO_MINALIGN_ATTR; }; +struct ahash_request { + struct crypto_async_request base; + + void *info; + + unsigned int nbytes; + struct scatterlist *src; + u8 *result; + + void *__ctx[] CRYPTO_MINALIGN_ATTR; +}; + /** * struct aead_request - AEAD request * @base: Common attributes for async crypto requests @@ -195,6 +210,17 @@ struct ablkcipher_alg { unsigned int ivsize; }; +struct ahash_alg { + int (*init)(struct ahash_request *req); + int (*update)(struct ahash_request *req); + int (*final)(struct ahash_request *req); + int (*digest)(struct ahash_request *req); + int (*setkey)(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen); + + unsigned int digestsize; +}; + struct aead_alg { int (*setkey)(struct crypto_aead *tfm, const u8 *key, unsigned int keylen); @@ -272,6 +298,7 @@ struct compress_alg { #define cra_cipher cra_u.cipher #define cra_digest cra_u.digest #define cra_hash cra_u.hash +#define cra_ahash cra_u.ahash #define cra_compress cra_u.compress struct crypto_alg { @@ -298,6 +325,7 @@ struct crypto_alg { struct cipher_alg cipher; struct digest_alg digest; struct hash_alg hash; + struct ahash_alg ahash; struct compress_alg compress; } cra_u; @@ -383,6 +411,19 @@ struct hash_tfm { unsigned int digestsize; }; +struct ahash_tfm { + int (*init)(struct ahash_request *req); + int (*update)(struct ahash_request *req); + int (*final)(struct ahash_request *req); + int (*digest)(struct ahash_request *req); + int (*setkey)(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen); + + unsigned int digestsize; + struct crypto_ahash *base; + unsigned int reqsize; +}; + struct compress_tfm { int (*cot_compress)(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, @@ -397,6 +438,7 @@ struct compress_tfm { #define crt_blkcipher crt_u.blkcipher #define crt_cipher crt_u.cipher #define crt_hash crt_u.hash +#define crt_ahash crt_u.ahash #define crt_compress crt_u.compress struct crypto_tfm { @@ -409,6 +451,7 @@ struct crypto_tfm { struct blkcipher_tfm blkcipher; struct cipher_tfm cipher; struct hash_tfm hash; + struct ahash_tfm ahash; struct compress_tfm compress; } crt_u; @@ -441,6 +484,10 @@ struct crypto_hash { struct crypto_tfm base; }; +struct crypto_ahash { + struct crypto_tfm base; +}; + enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, @@ -1264,5 +1311,137 @@ static inline int crypto_comp_decompress(struct crypto_comp *tfm, src, slen, dst, dlen); } +static inline struct crypto_ahash *__crypto_ahash_cast(struct crypto_tfm *tfm) +{ + return (struct crypto_ahash *)tfm; +} + +static inline struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, + u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + mask &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_AHASH; + mask |= CRYPTO_ALG_TYPE_AHASH_MASK; + + return __crypto_ahash_cast(crypto_alloc_base(alg_name, type, mask)); +} + +static inline struct crypto_tfm *crypto_ahash_tfm(struct crypto_ahash *tfm) +{ + return &tfm->base; +} + +static inline void crypto_free_ahash(struct crypto_ahash *tfm) +{ + crypto_free_tfm(crypto_ahash_tfm(tfm)); +} + +static inline unsigned int crypto_ahash_alignmask( + struct crypto_ahash *tfm) +{ + return crypto_tfm_alg_alignmask(crypto_ahash_tfm(tfm)); +} + +static inline struct ahash_tfm *crypto_ahash_crt(struct crypto_ahash *tfm) +{ + return &crypto_ahash_tfm(tfm)->crt_ahash; +} + +static inline unsigned int crypto_ahash_digestsize(struct crypto_ahash *tfm) +{ + return crypto_ahash_crt(tfm)->digestsize; +} + +static inline u32 crypto_ahash_get_flags(struct crypto_ahash *tfm) +{ + return crypto_tfm_get_flags(crypto_ahash_tfm(tfm)); +} + +static inline void crypto_ahash_set_flags(struct crypto_ahash *tfm, u32 flags) +{ + crypto_tfm_set_flags(crypto_ahash_tfm(tfm), flags); +} + +static inline void crypto_ahash_clear_flags(struct crypto_ahash *tfm, u32 flags) +{ + crypto_tfm_clear_flags(crypto_ahash_tfm(tfm), flags); +} + +static inline struct crypto_ahash *crypto_ahash_reqtfm( + struct ahash_request *req) +{ + return __crypto_ahash_cast(req->base.tfm); +} + +static inline unsigned int crypto_ahash_reqsize(struct crypto_ahash *tfm) +{ + return crypto_ahash_crt(tfm)->reqsize; +} + +static inline int crypto_ahash_setkey(struct crypto_ahash *tfm, + const u8 *key, unsigned int keylen) +{ + struct ahash_tfm *crt = crypto_ahash_crt(tfm); + + return crt->setkey(crt->base, key, keylen); +} + +static inline int crypto_ahash_digest(struct ahash_request *req) +{ + struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); + return crt->digest(req); +} + +static inline void ahash_request_set_tfm(struct ahash_request *req, + struct crypto_ahash *tfm) +{ + req->base.tfm = crypto_ahash_tfm(crypto_ahash_crt(tfm)->base); +} + +static inline struct ahash_request *ahash_request_alloc( + struct crypto_ahash *tfm, gfp_t gfp) +{ + struct ahash_request *req; + + req = kmalloc(sizeof(struct ahash_request) + + crypto_ahash_reqsize(tfm), gfp); + + if (likely(req)) + ahash_request_set_tfm(req, tfm); + + return req; +} + +static inline void ahash_request_free(struct ahash_request *req) +{ + kfree(req); +} + +static inline struct ahash_request *ahash_request_cast( + struct crypto_async_request *req) +{ + return container_of(req, struct ahash_request, base); +} + +static inline void ahash_request_set_callback(struct ahash_request *req, + u32 flags, + crypto_completion_t complete, + void *data) +{ + req->base.complete = complete; + req->base.data = data; + req->base.flags = flags; +} + +static inline void ahash_request_set_crypt(struct ahash_request *req, + struct scatterlist *src, u8 *result, + unsigned int nbytes) +{ + req->src = src; + req->nbytes = nbytes; + req->result = result; +} + #endif /* _LINUX_CRYPTO_H */ -- cgit v1.2.2 From 166247f46a9c866e6f7f7d2212be875fb82212a1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Jul 2008 20:54:35 +0800 Subject: crypto: hash - Removed vestigial ahash fields The base field in ahash_tfm appears to have been cut-n-pasted from ablkcipher. It isn't needed here at all. Similarly, the info field in ahash_request also appears to have originated from its cipher counter-part and is vestigial. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b6efe569128d..68ef293644d3 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -137,8 +137,6 @@ struct ablkcipher_request { struct ahash_request { struct crypto_async_request base; - void *info; - unsigned int nbytes; struct scatterlist *src; u8 *result; @@ -420,7 +418,6 @@ struct ahash_tfm { unsigned int keylen); unsigned int digestsize; - struct crypto_ahash *base; unsigned int reqsize; }; @@ -1384,7 +1381,7 @@ static inline int crypto_ahash_setkey(struct crypto_ahash *tfm, { struct ahash_tfm *crt = crypto_ahash_crt(tfm); - return crt->setkey(crt->base, key, keylen); + return crt->setkey(tfm, key, keylen); } static inline int crypto_ahash_digest(struct ahash_request *req) @@ -1396,7 +1393,7 @@ static inline int crypto_ahash_digest(struct ahash_request *req) static inline void ahash_request_set_tfm(struct ahash_request *req, struct crypto_ahash *tfm) { - req->base.tfm = crypto_ahash_tfm(crypto_ahash_crt(tfm)->base); + req->base.tfm = crypto_ahash_tfm(tfm); } static inline struct ahash_request *ahash_request_alloc( -- cgit v1.2.2 From 18e33e6d5cc0495826f5245777cd267732815e01 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 10 Jul 2008 16:01:22 +0800 Subject: crypto: hash - Move ahash functions into crypto/hash.h All new crypto interfaces should go into individual files as much as possible in order to ensure that crypto.h does not collapse under its own weight. This patch moves the ahash code into crypto/hash.h and crypto/internal/hash.h respectively. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 136 ------------------------------------------------- 1 file changed, 136 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 68ef293644d3..c43dc47fdf75 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -481,10 +481,6 @@ struct crypto_hash { struct crypto_tfm base; }; -struct crypto_ahash { - struct crypto_tfm base; -}; - enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, @@ -1308,137 +1304,5 @@ static inline int crypto_comp_decompress(struct crypto_comp *tfm, src, slen, dst, dlen); } -static inline struct crypto_ahash *__crypto_ahash_cast(struct crypto_tfm *tfm) -{ - return (struct crypto_ahash *)tfm; -} - -static inline struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, - u32 type, u32 mask) -{ - type &= ~CRYPTO_ALG_TYPE_MASK; - mask &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_AHASH; - mask |= CRYPTO_ALG_TYPE_AHASH_MASK; - - return __crypto_ahash_cast(crypto_alloc_base(alg_name, type, mask)); -} - -static inline struct crypto_tfm *crypto_ahash_tfm(struct crypto_ahash *tfm) -{ - return &tfm->base; -} - -static inline void crypto_free_ahash(struct crypto_ahash *tfm) -{ - crypto_free_tfm(crypto_ahash_tfm(tfm)); -} - -static inline unsigned int crypto_ahash_alignmask( - struct crypto_ahash *tfm) -{ - return crypto_tfm_alg_alignmask(crypto_ahash_tfm(tfm)); -} - -static inline struct ahash_tfm *crypto_ahash_crt(struct crypto_ahash *tfm) -{ - return &crypto_ahash_tfm(tfm)->crt_ahash; -} - -static inline unsigned int crypto_ahash_digestsize(struct crypto_ahash *tfm) -{ - return crypto_ahash_crt(tfm)->digestsize; -} - -static inline u32 crypto_ahash_get_flags(struct crypto_ahash *tfm) -{ - return crypto_tfm_get_flags(crypto_ahash_tfm(tfm)); -} - -static inline void crypto_ahash_set_flags(struct crypto_ahash *tfm, u32 flags) -{ - crypto_tfm_set_flags(crypto_ahash_tfm(tfm), flags); -} - -static inline void crypto_ahash_clear_flags(struct crypto_ahash *tfm, u32 flags) -{ - crypto_tfm_clear_flags(crypto_ahash_tfm(tfm), flags); -} - -static inline struct crypto_ahash *crypto_ahash_reqtfm( - struct ahash_request *req) -{ - return __crypto_ahash_cast(req->base.tfm); -} - -static inline unsigned int crypto_ahash_reqsize(struct crypto_ahash *tfm) -{ - return crypto_ahash_crt(tfm)->reqsize; -} - -static inline int crypto_ahash_setkey(struct crypto_ahash *tfm, - const u8 *key, unsigned int keylen) -{ - struct ahash_tfm *crt = crypto_ahash_crt(tfm); - - return crt->setkey(tfm, key, keylen); -} - -static inline int crypto_ahash_digest(struct ahash_request *req) -{ - struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); - return crt->digest(req); -} - -static inline void ahash_request_set_tfm(struct ahash_request *req, - struct crypto_ahash *tfm) -{ - req->base.tfm = crypto_ahash_tfm(tfm); -} - -static inline struct ahash_request *ahash_request_alloc( - struct crypto_ahash *tfm, gfp_t gfp) -{ - struct ahash_request *req; - - req = kmalloc(sizeof(struct ahash_request) + - crypto_ahash_reqsize(tfm), gfp); - - if (likely(req)) - ahash_request_set_tfm(req, tfm); - - return req; -} - -static inline void ahash_request_free(struct ahash_request *req) -{ - kfree(req); -} - -static inline struct ahash_request *ahash_request_cast( - struct crypto_async_request *req) -{ - return container_of(req, struct ahash_request, base); -} - -static inline void ahash_request_set_callback(struct ahash_request *req, - u32 flags, - crypto_completion_t complete, - void *data) -{ - req->base.complete = complete; - req->base.data = data; - req->base.flags = flags; -} - -static inline void ahash_request_set_crypt(struct ahash_request *req, - struct scatterlist *src, u8 *result, - unsigned int nbytes) -{ - req->src = src; - req->nbytes = nbytes; - req->result = result; -} - #endif /* _LINUX_CRYPTO_H */ -- cgit v1.2.2 From b7a39bd0afc4021e8ad2b1189e884551e147427f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 23 May 2008 18:38:49 +0100 Subject: firmware: make fw->data const In preparation for supporting firmware files linked into the static kernel, make fw->data const to ensure that users aren't modifying it (so that we can pass a pointer to the original in-kernel copy, rather than having to copy it). Signed-off-by: David Woodhouse --- include/linux/firmware.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 6c7eff2ebada..88718d60153c 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -8,7 +8,7 @@ struct firmware { size_t size; - u8 *data; + const u8 *data; }; struct device; -- cgit v1.2.2 From 5658c769443d543728b6c5c673dffc2df8676317 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 23 May 2008 13:52:42 +0100 Subject: firmware: allow firmware files to be built into kernel image Some drivers have their own hacks to bypass the kernel's firmware loader and build their firmware into the kernel; this renders those unnecessary. Other drivers don't use the firmware loader at all, because they always want the firmware to be available. This allows them to start using the firmware loader. A third set of drivers already use the firmware loader, but can't be used without help from userspace, which sometimes requires an initrd. This allows them to work in a static kernel. Signed-off-by: David Woodhouse --- include/linux/firmware.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 88718d60153c..c8ecf5b2a207 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -1,7 +1,10 @@ #ifndef _LINUX_FIRMWARE_H #define _LINUX_FIRMWARE_H + #include #include +#include + #define FIRMWARE_NAME_MAX 30 #define FW_ACTION_NOHOTPLUG 0 #define FW_ACTION_HOTPLUG 1 @@ -13,6 +16,24 @@ struct firmware { struct device; +struct builtin_fw { + char *name; + void *data; + unsigned long size; +}; + +/* We have to play tricks here much like stringify() to get the + __COUNTER__ macro to be expanded as we want it */ +#define __fw_concat1(x, y) x##y +#define __fw_concat(x, y) __fw_concat1(x, y) + +#define DECLARE_BUILTIN_FIRMWARE(name, blob) \ + DECLARE_BUILTIN_FIRMWARE_SIZE(name, &(blob), sizeof(blob)) + +#define DECLARE_BUILTIN_FIRMWARE_SIZE(name, blob, size) \ + static const struct builtin_fw __fw_concat(__builtin_fw,__COUNTER__) \ + __used __section(.builtin_fw) = { name, blob, size } + #if defined(CONFIG_FW_LOADER) || (defined(CONFIG_FW_LOADER_MODULE) && defined(MODULE)) int request_firmware(const struct firmware **fw, const char *name, struct device *device); -- cgit v1.2.2 From bacfe09dd7545467965e8d8f1eab20bc62dce00d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 30 May 2008 13:57:27 +0300 Subject: ihex.h: binary representation of ihex records Some devices need their firmware as a set of {address, len, data...} records in some specific order rather than a simple blob. The normal way of doing this kind of thing is 'ihex', which is a text format and not entirely suitable for use in the kernel. This provides a binary representation which is very similar, but much more compact -- and a helper routine to skip to the next record, because the alignment constraints mean that everybody will screw it up for themselves otherwise. Also a helper function which can verify that a 'struct firmware' contains a valid set of ihex records, and that following them won't run off the end of the loaded data. Signed-off-by: David Woodhouse --- include/linux/ihex.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 include/linux/ihex.h (limited to 'include/linux') diff --git a/include/linux/ihex.h b/include/linux/ihex.h new file mode 100644 index 000000000000..df89edd890ae --- /dev/null +++ b/include/linux/ihex.h @@ -0,0 +1,50 @@ +/* + * Compact binary representation of ihex records. Some devices need their + * firmware loaded in strange orders rather than a single big blob, but + * actually parsing ihex-as-text within the kernel seems silly. Thus,... + */ + +#ifndef __LINUX_IHEX_H__ +#define __LINUX_IHEX_H__ + +#include +#include + +/* Intel HEX files actually limit the length to 256 bytes, but we have + drivers which would benefit from using separate records which are + longer than that, so we extend to 16 bits of length */ +struct ihex_binrec { + __be32 addr; + __be16 len; + uint8_t data[0]; +} __attribute__((aligned(4))); + +/* Find the next record, taking into account the 4-byte alignment */ +static inline const struct ihex_binrec * +ihex_next_binrec(const struct ihex_binrec *rec) +{ + int next = ((be16_to_cpu(rec->len) + 5) & ~3) - 2; + rec = (void *)&rec->data[next]; + + return be16_to_cpu(rec->len) ? rec : NULL; +} + +/* Check that ihex_next_binrec() won't take us off the end of the image... */ +static inline int ihex_validate_fw(const struct firmware *fw) +{ + const struct ihex_binrec *rec; + size_t ofs = 0; + + while (ofs <= fw->size - sizeof(*rec)) { + rec = (void *)&fw->data[ofs]; + + /* Zero length marks end of records */ + if (!be16_to_cpu(rec->len)) + return 0; + + /* Point to next record... */ + ofs += (sizeof(*rec) + be16_to_cpu(rec->len) + 3) & ~3; + } + return -EINVAL; +} +#endif /* __LINUX_IHEX_H__ */ -- cgit v1.2.2 From f1485f3deb89e6ae10c4d34662ec9e692855ab5d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 31 May 2008 15:20:37 +0300 Subject: ihex: request_ihex_firmware() function to load and validate firmware Provide a helper to load the file and validate it in one call, to simplify error handling in the drivers which are going to use it. Signed-off-by: David Woodhouse --- include/linux/ihex.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ihex.h b/include/linux/ihex.h index df89edd890ae..2baace2788a7 100644 --- a/include/linux/ihex.h +++ b/include/linux/ihex.h @@ -9,6 +9,7 @@ #include #include +#include /* Intel HEX files actually limit the length to 256 bytes, but we have drivers which would benefit from using separate records which are @@ -47,4 +48,27 @@ static inline int ihex_validate_fw(const struct firmware *fw) } return -EINVAL; } + +/* Request firmware and validate it so that we can trust we won't + * run off the end while reading records... */ +static inline int request_ihex_firmware(const struct firmware **fw, + const char *fw_name, + struct device *dev) +{ + const struct firmware *lfw; + int ret; + + ret = request_firmware(&lfw, fw_name, dev); + if (ret) + return ret; + ret = ihex_validate_fw(lfw); + if (ret) { + dev_err(dev, "Firmware \"%s\" not valid IHEX records\n", + fw_name); + release_firmware(lfw); + return ret; + } + *fw = lfw; + return 0; +} #endif /* __LINUX_IHEX_H__ */ -- cgit v1.2.2 From ccf9b3b83d0e56fbf20c00a08b15031ce13204a7 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 10 Jul 2008 16:55:37 -0700 Subject: xfrm: Add a XFRM_STATE_AF_UNSPEC flag to xfrm_usersa_info Add a XFRM_STATE_AF_UNSPEC flag to handle the AF_UNSPEC behavior for the selector family. Userspace applications can set this flag to leave the selector family of the xfrm_state unspecified. This can be used to to handle inter family tunnels if the selector is not set from userspace. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 2ca6bae88721..fb0c215a3051 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -339,6 +339,7 @@ struct xfrm_usersa_info { #define XFRM_STATE_NOPMTUDISC 4 #define XFRM_STATE_WILDRECV 8 #define XFRM_STATE_ICMP 16 +#define XFRM_STATE_AF_UNSPEC 32 }; struct xfrm_usersa_id { -- cgit v1.2.2 From a2bb6a3d85ef3124cd336403a95abc0540d3fbe2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Jul 2008 20:58:15 -0400 Subject: ftrace: add ftrace_kill_atomic It has been suggested that I add a way to disable the function tracer on an oops. This code adds a ftrace_kill_atomic. It is not meant to be used in normal situations. It will disable the ftrace tracer, but will not perform the nice shutdown that requires scheduling. Signed-off-by: Steven Rostedt Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3121b95443d9..f368d041e02d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -89,6 +89,7 @@ void ftrace_enable_daemon(void); /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); +void ftrace_kill_atomic(void); static inline void tracer_disable(void) { -- cgit v1.2.2 From af52a90a14cdaa54ecbfb6e6982abb13466a4b56 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 7 Jul 2008 14:16:52 -0400 Subject: sched_clock: stop maximum check on NO HZ Working with ftrace I would get large jumps of 11 millisecs or more with the clock tracer. This killed the latencing timings of ftrace and also caused the irqoff self tests to fail. What was happening is with NO_HZ the idle would stop the jiffy counter and before the jiffy counter was updated the sched_clock would have a bad delta jiffies to compare with the gtod with the maximum. The jiffies would stop and the last sched_tick would record the last gtod. On wakeup, the sched clock update would compare the gtod + delta jiffies (which would be zero) and compare it to the TSC. The TSC would have correctly (with a stable TSC) moved forward several jiffies. But because the jiffies has not been updated yet the clock would be prevented from moving forward because it would appear that the TSC jumped too far ahead. The clock would then virtually stop, until the jiffies are updated. Then the next sched clock update would see that the clock was very much behind since the delta jiffies is now correct. This would then jump the clock forward by several jiffies. This caused ftrace to report several milliseconds of interrupts off latency at every resume from NO_HZ idle. This patch adds hooks into the nohz code to disable the checking of the maximum clock update when nohz is in effect. It resumes the max check when nohz has updated the jiffies again. Signed-off-by: Steven Rostedt Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/sched.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c5d3f847ca8d..33a8f42041fa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1573,13 +1573,28 @@ static inline void sched_clock_idle_sleep_event(void) static inline void sched_clock_idle_wakeup_event(u64 delta_ns) { } -#else + +#ifdef CONFIG_NO_HZ +static inline void sched_clock_tick_stop(int cpu) +{ +} + +static inline void sched_clock_tick_start(int cpu) +{ +} +#endif + +#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ extern void sched_clock_init(void); extern u64 sched_clock_cpu(int cpu); extern void sched_clock_tick(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); +#ifdef CONFIG_NO_HZ +extern void sched_clock_tick_stop(int cpu); +extern void sched_clock_tick_start(int cpu); #endif +#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ /* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu -- cgit v1.2.2 From 736603ab297506f4396cb5af592004499950fcfd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: jbd2: Add commit time into the commit block Carlo Wood has demonstrated that it's possible to recover deleted files from the journal. Something that will make this easier is if we can put the time of the commit into commit block. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d147f0f90360..ec9cadf58227 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -168,6 +168,8 @@ struct commit_header { unsigned char h_chksum_size; unsigned char h_padding[2]; __be32 h_chksum[JBD2_CHECKSUM_BYTES]; + __be64 h_commit_sec; + __be32 h_commit_nsec; }; /* -- cgit v1.2.2 From f4c0a0fdfae708f7aa438c27a380ed4071294e11 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: vfs: export filemap_fdatawrite_range() Make filemap_fdatawrite_range() function public, so that it can later be used in ordered mode rewrite by JBD/JBD2. Signed-off-by: Jan Kara --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d8e2762ed14d..97f992adc62d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1740,6 +1740,8 @@ extern int wait_on_page_writeback_range(struct address_space *mapping, pgoff_t start, pgoff_t end); extern int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end, int sync_mode); +extern int filemap_fdatawrite_range(struct address_space *mapping, + loff_t start, loff_t end); extern long do_fsync(struct file *file, int datasync); extern void sync_supers(void); -- cgit v1.2.2 From c851ed540173736e60d48b53b91a16ea5c903896 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: jbd2: Implement data=ordered mode handling via inodes This patch adds necessary framework into JBD2 to be able to track inodes with each transaction and write-out their dirty data during transaction commit time. This new ordered mode brings all sorts of advantages such as possibility to get rid of journal heads and buffer heads for data buffers in ordered mode, better ordering of writes on transaction commit, simplification of some JBD code, no more anonymous pages when truncate of data being committed happens. Also with this new ordered mode, delayed allocation on ordered mode is much simpler. Signed-off-by: Jan Kara --- include/linux/jbd2.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index ec9cadf58227..622c3d8ca4ed 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -381,6 +381,38 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) bit_spin_unlock(BH_JournalHead, &bh->b_state); } +/* Flags in jbd_inode->i_flags */ +#define __JI_COMMIT_RUNNING 0 +/* Commit of the inode data in progress. We use this flag to protect us from + * concurrent deletion of inode. We cannot use reference to inode for this + * since we cannot afford doing last iput() on behalf of kjournald + */ +#define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING) + +/** + * struct jbd_inode is the structure linking inodes in ordered mode + * present in a transaction so that we can sync them during commit. + */ +struct jbd2_inode { + /* Which transaction does this inode belong to? Either the running + * transaction or the committing one. [j_list_lock] */ + transaction_t *i_transaction; + + /* Pointer to the running transaction modifying inode's data in case + * there is already a committing transaction touching it. [j_list_lock] */ + transaction_t *i_next_transaction; + + /* List of inodes in the i_transaction [j_list_lock] */ + struct list_head i_list; + + /* VFS inode this inode belongs to [constant during the lifetime + * of the structure] */ + struct inode *i_vfs_inode; + + /* Flags of inode [j_list_lock] */ + unsigned int i_flags; +}; + struct jbd2_revoke_table_s; /** @@ -566,6 +598,12 @@ struct transaction_s */ struct journal_head *t_log_list; + /* + * List of inodes whose data we've modified in data=ordered mode. + * [j_list_lock] + */ + struct list_head t_inode_list; + /* * Protects info related to handles */ @@ -1046,6 +1084,10 @@ extern void jbd2_journal_ack_err (journal_t *); extern int jbd2_journal_clear_err (journal_t *); extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); extern int jbd2_journal_force_commit(journal_t *); +extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); +extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size); +extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); +extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode); /* * journal_head management -- cgit v1.2.2 From 87c89c232c8f7b3820c33c3b9bc803e9358027da Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: jbd2: Remove data=ordered mode support using jbd buffer heads Signed-off-by: Jan Kara --- include/linux/jbd2.h | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 622c3d8ca4ed..3dd209007098 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -542,24 +542,12 @@ struct transaction_s */ struct journal_head *t_reserved_list; - /* - * Doubly-linked circular list of all buffers under writeout during - * commit [j_list_lock] - */ - struct journal_head *t_locked_list; - /* * Doubly-linked circular list of all metadata buffers owned by this * transaction [j_list_lock] */ struct journal_head *t_buffers; - /* - * Doubly-linked circular list of all data buffers still to be - * flushed before this transaction can be committed [j_list_lock] - */ - struct journal_head *t_sync_datalist; - /* * Doubly-linked circular list of all forget buffers (superseded * buffers which we can un-checkpoint once this transaction commits) @@ -1044,7 +1032,6 @@ extern int jbd2_journal_extend (handle_t *, int nblocks); extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *); -extern int jbd2_journal_dirty_data (handle_t *, struct buffer_head *); extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern void jbd2_journal_release_buffer (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); @@ -1223,15 +1210,13 @@ static inline int jbd_space_needed(journal_t *journal) /* journaling buffer types */ #define BJ_None 0 /* Not journaled */ -#define BJ_SyncData 1 /* Normal data: flush before commit */ -#define BJ_Metadata 2 /* Normal journaled metadata */ -#define BJ_Forget 3 /* Buffer superseded by this transaction */ -#define BJ_IO 4 /* Buffer is for temporary IO use */ -#define BJ_Shadow 5 /* Buffer contents being shadowed to the log */ -#define BJ_LogCtl 6 /* Buffer contains log descriptors */ -#define BJ_Reserved 7 /* Buffer is reserved for access by journal */ -#define BJ_Locked 8 /* Locked for I/O during commit */ -#define BJ_Types 9 +#define BJ_Metadata 1 /* Normal journaled metadata */ +#define BJ_Forget 2 /* Buffer superseded by this transaction */ +#define BJ_IO 3 /* Buffer is for temporary IO use */ +#define BJ_Shadow 4 /* Buffer contents being shadowed to the log */ +#define BJ_LogCtl 5 /* Buffer contains log descriptors */ +#define BJ_Reserved 6 /* Buffer is reserved for access by journal */ +#define BJ_Types 7 extern int jbd_blocks_per_page(struct inode *inode); -- cgit v1.2.2 From 29a814d2ee0e43c2980f33f91c1311ec06c0aa35 Mon Sep 17 00:00:00 2001 From: Alex Tomas Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: vfs: add hooks for ext4's delayed allocation support Export mpage_bio_submit() and __mpage_writepage() for the benefit of ext4's delayed allocation support. Also change __block_write_full_page so that if buffers that have the BH_Delay flag set it will call get_block() to get the physical block allocated, just as in the !BH_Mapped case. Signed-off-by: Alex Tomas Signed-off-by: "Theodore Ts'o" --- include/linux/mpage.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 068a0c9946af..5c42821da2d1 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -11,11 +11,21 @@ */ #ifdef CONFIG_BLOCK +struct mpage_data { + struct bio *bio; + sector_t last_block_in_bio; + get_block_t *get_block; + unsigned use_writepage; +}; + struct writeback_control; +struct bio *mpage_bio_submit(int rw, struct bio *bio); int mpage_readpages(struct address_space *mapping, struct list_head *pages, unsigned nr_pages, get_block_t get_block); int mpage_readpage(struct page *page, get_block_t get_block); +int __mpage_writepage(struct page *page, struct writeback_control *wbc, + void *data); int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block); int mpage_writepage(struct page *page, get_block_t *get_block, -- cgit v1.2.2 From e8ced39d5e8911c662d4d69a342b9d053eaaac4e Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: percpu_counter: new function percpu_counter_sum_and_set Delayed allocation need to check free blocks at every write time. percpu_counter_read_positive() is not quit accurate. delayed allocation need a more accurate accounting, but using percpu_counter_sum_positive() is frequently is quite expensive. This patch added a new function to update center counter when sum per-cpu counter, to increase the accurate rate for next percpu_counter_read() and require less calling expensive percpu_counter_sum(). Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- include/linux/percpu_counter.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 9007ccdfc112..208388835357 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); -s64 __percpu_counter_sum(struct percpu_counter *fbc); +s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -44,13 +44,19 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { - s64 ret = __percpu_counter_sum(fbc); + s64 ret = __percpu_counter_sum(fbc, 0); return ret < 0 ? 0 : ret; } +static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc) +{ + return __percpu_counter_sum(fbc, 1); +} + + static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum(fbc); + return __percpu_counter_sum(fbc, 0); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) -- cgit v1.2.2 From 06d6cf6959d22037fcec598f4f954db5db3d7356 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: mm: Add range_cont mode for writeback Filesystems like ext4 needs to start a new transaction in the writepages for block allocation. This happens with delayed allocation and there is limit to how many credits we can request from the journal layer. So we call write_cache_pages multiple times with wbc->nr_to_write set to the maximum possible value limitted by the max journal credits available. Add a new mode to writeback that enables us to handle this behaviour. In the new mode we update the wbc->range_start to point to the new offset to be written. Next call to call to write_cache_pages will start writeout from specified range_start offset. In the new mode we also limit writing to the specified wbc->range_end. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Acked-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- include/linux/writeback.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f462439cc288..0d8573e6b9ec 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -63,6 +63,7 @@ struct writeback_control { unsigned for_writepages:1; /* This is a writepages() call */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned more_io:1; /* more io to be dispatched */ + unsigned range_cont:1; }; /* -- cgit v1.2.2 From 006ebb40d3d65338bd74abb03b945f8d60e362bd Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Mon, 19 May 2008 08:32:49 -0400 Subject: Security: split proc ptrace checking into read vs. attach Enable security modules to distinguish reading of process state via proc from full ptrace access by renaming ptrace_may_attach to ptrace_may_access and adding a mode argument indicating whether only read access or full attach access is requested. This allows security modules to permit access to reading process state without granting full ptrace access. The base DAC/capability checking remains unchanged. Read access to /proc/pid/mem continues to apply a full ptrace attach check since check_mem_permission() already requires the current task to already be ptracing the target. The other ptrace checks within proc for elements like environ, maps, and fds are changed to pass the read mode instead of attach. In the SELinux case, we model such reading of process state as a reading of a proc file labeled with the target process' label. This enables SELinux policy to permit such reading of process state without permitting control or manipulation of the target process, as there are a number of cases where programs probe for such information via proc but do not need to be able to control the target (e.g. procps, lsof, PolicyKit, ConsoleKit). At present we have to choose between allowing full ptrace in policy (more permissive than required/desired) or breaking functionality (or in some cases just silencing the denials via dontaudit rules but this can hide genuine attacks). This version of the patch incorporates comments from Casey Schaufler (change/replace existing ptrace_may_attach interface, pass access mode), and Chris Wright (provide greater consistency in the checking). Note that like their predecessors __ptrace_may_attach and ptrace_may_attach, the __ptrace_may_access and ptrace_may_access interfaces use different return value conventions from each other (0 or -errno vs. 1 or 0). I retained this difference to avoid any changes to the caller logic but made the difference clearer by changing the latter interface to return a bool rather than an int and by adding a comment about it to ptrace.h for any future callers. Signed-off-by: Stephen Smalley Acked-by: Chris Wright Signed-off-by: James Morris --- include/linux/ptrace.h | 8 ++++++-- include/linux/security.h | 16 +++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index f98501ba557e..c6f5f9dd0cee 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -95,8 +95,12 @@ extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); extern void ptrace_untrace(struct task_struct *child); -extern int ptrace_may_attach(struct task_struct *task); -extern int __ptrace_may_attach(struct task_struct *task); +#define PTRACE_MODE_READ 1 +#define PTRACE_MODE_ATTACH 2 +/* Returns 0 on success, -errno on denial. */ +extern int __ptrace_may_access(struct task_struct *task, unsigned int mode); +/* Returns true on success, false on denial. */ +extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); static inline int ptrace_reparented(struct task_struct *child) { diff --git a/include/linux/security.h b/include/linux/security.h index 50737c70e78e..62bd80cb7f87 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -46,7 +46,8 @@ struct audit_krule; */ extern int cap_capable(struct task_struct *tsk, int cap); extern int cap_settime(struct timespec *ts, struct timezone *tz); -extern int cap_ptrace(struct task_struct *parent, struct task_struct *child); +extern int cap_ptrace(struct task_struct *parent, struct task_struct *child, + unsigned int mode); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); extern int cap_capset_check(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); extern void cap_capset_set(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); @@ -1170,6 +1171,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * attributes would be changed by the execve. * @parent contains the task_struct structure for parent process. * @child contains the task_struct structure for child process. + * @mode contains the PTRACE_MODE flags indicating the form of access. * Return 0 if permission is granted. * @capget: * Get the @effective, @inheritable, and @permitted capability sets for @@ -1295,7 +1297,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) struct security_operations { char name[SECURITY_NAME_MAX + 1]; - int (*ptrace) (struct task_struct *parent, struct task_struct *child); + int (*ptrace) (struct task_struct *parent, struct task_struct *child, + unsigned int mode); int (*capget) (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); @@ -1573,7 +1576,8 @@ extern struct dentry *securityfs_create_dir(const char *name, struct dentry *par extern void securityfs_remove(struct dentry *dentry); /* Security operations */ -int security_ptrace(struct task_struct *parent, struct task_struct *child); +int security_ptrace(struct task_struct *parent, struct task_struct *child, + unsigned int mode); int security_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, @@ -1755,9 +1759,11 @@ static inline int security_init(void) return 0; } -static inline int security_ptrace(struct task_struct *parent, struct task_struct *child) +static inline int security_ptrace(struct task_struct *parent, + struct task_struct *child, + unsigned int mode) { - return cap_ptrace(parent, child); + return cap_ptrace(parent, child, mode); } static inline int security_capget(struct task_struct *target, -- cgit v1.2.2 From 2069f457848f846cb31149c9aa29b330a6b66d1b Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 4 Jul 2008 09:47:13 +1000 Subject: LSM/SELinux: show LSM mount options in /proc/mounts This patch causes SELinux mount options to show up in /proc/mounts. As with other code in the area seq_put errors are ignored. Other LSM's will not have their mount options displayed until they fill in their own security_sb_show_options() function. Signed-off-by: Eric Paris Signed-off-by: Miklos Szeredi Signed-off-by: James Morris --- include/linux/security.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 62bd80cb7f87..c8ad8ec684b4 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -80,6 +80,7 @@ struct xfrm_selector; struct xfrm_policy; struct xfrm_state; struct xfrm_user_sec_ctx; +struct seq_file; extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb); extern int cap_netlink_recv(struct sk_buff *skb, int cap); @@ -1331,6 +1332,7 @@ struct security_operations { void (*sb_free_security) (struct super_block *sb); int (*sb_copy_data) (char *orig, char *copy); int (*sb_kern_mount) (struct super_block *sb, void *data); + int (*sb_show_options) (struct seq_file *m, struct super_block *sb); int (*sb_statfs) (struct dentry *dentry); int (*sb_mount) (char *dev_name, struct path *path, char *type, unsigned long flags, void *data); @@ -1610,6 +1612,7 @@ int security_sb_alloc(struct super_block *sb); void security_sb_free(struct super_block *sb); int security_sb_copy_data(char *orig, char *copy); int security_sb_kern_mount(struct super_block *sb, void *data); +int security_sb_show_options(struct seq_file *m, struct super_block *sb); int security_sb_statfs(struct dentry *dentry); int security_sb_mount(char *dev_name, struct path *path, char *type, unsigned long flags, void *data); @@ -1887,6 +1890,12 @@ static inline int security_sb_kern_mount(struct super_block *sb, void *data) return 0; } +static inline int security_sb_show_options(struct seq_file *m, + struct super_block *sb) +{ + return 0; +} + static inline int security_sb_statfs(struct dentry *dentry) { return 0; -- cgit v1.2.2 From b478a9f9889c81e88077d1495daadee64c0af541 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 3 Jul 2008 20:56:04 +0200 Subject: security: remove unused sb_get_mnt_opts hook The sb_get_mnt_opts() hook is unused, and is superseded by the sb_show_options() hook. Signed-off-by: Miklos Szeredi Acked-by: James Morris --- include/linux/security.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index c8ad8ec684b4..43c6357568a3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -291,10 +291,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Update module state after a successful pivot. * @old_path contains the path for the old root. * @new_path contains the path for the new root. - * @sb_get_mnt_opts: - * Get the security relevant mount options used for a superblock - * @sb the superblock to get security mount options from - * @opts binary data structure containing all lsm mount data * @sb_set_mnt_opts: * Set the security relevant mount options used for a superblock * @sb the superblock to set security mount options for @@ -1348,8 +1344,6 @@ struct security_operations { struct path *new_path); void (*sb_post_pivotroot) (struct path *old_path, struct path *new_path); - int (*sb_get_mnt_opts) (const struct super_block *sb, - struct security_mnt_opts *opts); int (*sb_set_mnt_opts) (struct super_block *sb, struct security_mnt_opts *opts); void (*sb_clone_mnt_opts) (const struct super_block *oldsb, @@ -1624,8 +1618,6 @@ void security_sb_post_remount(struct vfsmount *mnt, unsigned long flags, void *d void security_sb_post_addmount(struct vfsmount *mnt, struct path *mountpoint); int security_sb_pivotroot(struct path *old_path, struct path *new_path); void security_sb_post_pivotroot(struct path *old_path, struct path *new_path); -int security_sb_get_mnt_opts(const struct super_block *sb, - struct security_mnt_opts *opts); int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts); void security_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb); @@ -1942,12 +1934,6 @@ static inline int security_sb_pivotroot(struct path *old_path, static inline void security_sb_post_pivotroot(struct path *old_path, struct path *new_path) { } -static inline int security_sb_get_mnt_opts(const struct super_block *sb, - struct security_mnt_opts *opts) -{ - security_init_mnt_opts(opts); - return 0; -} static inline int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts) -- cgit v1.2.2 From 6f0f0fd496333777d53daff21a4e3b28c4d03a6d Mon Sep 17 00:00:00 2001 From: James Morris Date: Thu, 10 Jul 2008 17:02:07 +0900 Subject: security: remove register_security hook The register security hook is no longer required, as the capability module is always registered. LSMs wishing to stack capability as a secondary module should do so explicitly. Signed-off-by: James Morris Acked-by: Stephen Smalley Acked-by: Greg Kroah-Hartman --- include/linux/security.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 43c6357568a3..31c8851ec5d0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1239,11 +1239,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @pages contains the number of pages. * Return 0 if permission is granted. * - * @register_security: - * allow module stacking. - * @name contains the name of the security module being stacked. - * @ops contains a pointer to the struct security_operations of the module to stack. - * * @secid_to_secctx: * Convert secid to security context. * @secid contains the security ID. @@ -1471,10 +1466,6 @@ struct security_operations { int (*netlink_send) (struct sock *sk, struct sk_buff *skb); int (*netlink_recv) (struct sk_buff *skb, int cap); - /* allow module stacking */ - int (*register_security) (const char *name, - struct security_operations *ops); - void (*d_instantiate) (struct dentry *dentry, struct inode *inode); int (*getprocattr) (struct task_struct *p, char *name, char **value); @@ -1564,7 +1555,6 @@ struct security_operations { extern int security_init(void); extern int security_module_enable(struct security_operations *ops); extern int register_security(struct security_operations *ops); -extern int mod_reg_security(const char *name, struct security_operations *ops); extern struct dentry *securityfs_create_file(const char *name, mode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); -- cgit v1.2.2 From 7e9db9eaefdb8798730790214ff1b7746006ec98 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 14 Jul 2008 09:58:44 +0200 Subject: [S390] cio: Introduce modalias for css bus. Add modalias and subchannel type attributes for all subchannels. I/O subchannel specific attributes are now created in io_subchannel_probe(). modalias and subchannel type are also added to the uevent for the css bus. Also make the css modalias known. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- include/linux/mod_devicetable.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 69b2342d5ebb..1fd03e732e07 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -159,6 +159,15 @@ struct ap_device_id { #define AP_DEVICE_ID_MATCH_DEVICE_TYPE 0x01 +/* s390 css bus devices (subchannels) */ +struct css_device_id { + __u8 type; /* subchannel type */ + __u8 pad1; + __u16 pad2; + __u32 pad3; + kernel_ulong_t driver_data; +}; + #define ACPI_ID_LEN 16 /* only 9 bytes needed here, 16 bytes are used */ /* to workaround crosscompile issues */ -- cgit v1.2.2 From f08adc008d84f6b03d377ede951e29ed169e76e2 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 14 Jul 2008 09:59:03 +0200 Subject: [S390] css: Use css_device_id for bus matching. css_device_id exists, so use it for determining the right driver (and add a match_flags which is always 1 for valid types). Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- include/linux/mod_devicetable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 1fd03e732e07..c4db5827963d 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -161,8 +161,8 @@ struct ap_device_id { /* s390 css bus devices (subchannels) */ struct css_device_id { + __u8 match_flags; __u8 type; /* subchannel type */ - __u8 pad1; __u16 pad2; __u32 pad3; kernel_ulong_t driver_data; -- cgit v1.2.2 From 341c2c958ec7bdd9f54733a8b0b432fe76842a82 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 20 May 2008 02:17:51 +0900 Subject: libata: consistently use msecs for time durations libata has been using mix of jiffies and msecs for time druations. This is getting confusing. As writing sub HZ values in jiffies is PITA and msecs_to_jiffies() can't be used as initializer, unify unit for all time durations to msecs. So, durations are in msecs and deadlines are in jiffies. ata_deadline() is added to compute deadline from a start time and duration in msecs. While at it, drop now superflous _msec suffix from arguments and rename @timeout to @deadline if it represents a fixed point in time rather than duration. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index e57e5d08312d..94110b652b30 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -27,6 +27,7 @@ #define __LINUX_LIBATA_H__ #include +#include #include #include #include @@ -115,7 +116,7 @@ enum { /* tag ATA_MAX_QUEUE - 1 is reserved for internal commands */ ATA_MAX_QUEUE = 32, ATA_TAG_INTERNAL = ATA_MAX_QUEUE - 1, - ATA_SHORT_PAUSE = (HZ >> 6) + 1, + ATA_SHORT_PAUSE = 16, ATAPI_MAX_DRAIN = 16 << 10, @@ -234,17 +235,17 @@ enum { /* bits 24:31 of host->flags are reserved for LLD specific flags */ /* various lengths of time */ - ATA_TMOUT_BOOT = 30 * HZ, /* heuristic */ - ATA_TMOUT_BOOT_QUICK = 7 * HZ, /* heuristic */ - ATA_TMOUT_INTERNAL = 30 * HZ, - ATA_TMOUT_INTERNAL_QUICK = 5 * HZ, + ATA_TMOUT_BOOT = 30000, /* heuristic */ + ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */ + ATA_TMOUT_INTERNAL = 30000, + ATA_TMOUT_INTERNAL_QUICK = 5000, /* FIXME: GoVault needs 2s but we can't afford that without * parallel probing. 800ms is enough for iVDR disk * HHD424020F7SV00. Increase to 2secs when parallel probing * is in place. */ - ATA_TMOUT_FF_WAIT = 4 * HZ / 5, + ATA_TMOUT_FF_WAIT = 800, /* Spec mandates to wait for ">= 2ms" before checking status * after reset. We wait 150ms, because that was the magic @@ -256,14 +257,14 @@ enum { * * Old drivers/ide uses the 2mS rule and then waits for ready. */ - ATA_WAIT_AFTER_RESET_MSECS = 150, + ATA_WAIT_AFTER_RESET = 150, /* If PMP is supported, we have to do follow-up SRST. As some * PMPs don't send D2H Reg FIS after hardreset, LLDs are * advised to wait only for the following duration before * doing SRST. */ - ATA_TMOUT_PMP_SRST_WAIT = 1 * HZ, + ATA_TMOUT_PMP_SRST_WAIT = 1000, /* ATA bus states */ BUS_UNKNOWN = 0, @@ -895,8 +896,7 @@ extern void ata_host_resume(struct ata_host *host); #endif extern int ata_ratelimit(void); extern u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val, - unsigned long interval_msec, - unsigned long timeout_msec); + unsigned long interval, unsigned long timeout); extern int atapi_cmd_type(u8 opcode); extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis); @@ -1389,6 +1389,12 @@ static inline int ata_check_ready(u8 status) return 0; } +static inline unsigned long ata_deadline(unsigned long from_jiffies, + unsigned long timeout_msecs) +{ + return from_jiffies + msecs_to_jiffies(timeout_msecs); +} + /************************************************************************** * PMP - drivers/ata/libata-pmp.c -- cgit v1.2.2 From 0a2c0f56159999e20015241d3b8fa89b1ab14309 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 20 May 2008 02:17:52 +0900 Subject: libata: improve EH retry delay handling EH retries were delayed by 5 seconds to ensure that resets don't occur back-to-back. However, this 5 second delay is superflous or excessive in many cases. For example, after IDENTIFY times out, there's no reason to wait five more seconds before retrying. This patch adds ehc->last_reset timestamp and record the timestamp for the last reset trial or success and uses it to space resets by ATA_EH_RESET_COOL_DOWN which is 5 secs and removes unconditional 5 sec sleeps. As this change makes inter-try waits often shorter and they're redundant in nature, this patch also removes the "retrying..." messages. While at it, convert explicit rounding up division to DIV_ROUND_UP(). This change speeds up EH in many cases w/o sacrificing robustness. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 94110b652b30..9058c2a325a9 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -602,6 +602,8 @@ struct ata_eh_context { unsigned int did_probe_mask; unsigned int saved_ncq_enabled; u8 saved_xfer_mode[ATA_MAX_DEVICES]; + /* timestamp for the last reset attempt or success */ + unsigned long last_reset; }; struct ata_acpi_drive -- cgit v1.2.2 From 87fbc5a060faf2394bee88a93519f9b9d434727c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 20 May 2008 02:17:54 +0900 Subject: libata: improve EH internal command timeout handling ATA_TMOUT_INTERNAL which was 30secs were used for all internal commands which is way too long when something goes wrong. This patch implements command type based stepped timeouts. Different command types can use different timeouts and each command type can use different timeout values after timeouts. ie. the initial timeout is set to a value which should cover most of the cases but not too long so that run away cases don't delay things too much. After the first try times out, the second try can use longer timeout and if that one times out too, it can go for full 30sec timeout. IDENTIFYs use 5s - 10s - 30s timeout and all other commands use 5s - 10s timeouts. This patch significantly cuts down the needed time to handle failure cases while still allowing libata to work with nut job devices through retries. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 9058c2a325a9..035f8e1cd0ac 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -237,7 +237,6 @@ enum { /* various lengths of time */ ATA_TMOUT_BOOT = 30000, /* heuristic */ ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */ - ATA_TMOUT_INTERNAL = 30000, ATA_TMOUT_INTERNAL_QUICK = 5000, /* FIXME: GoVault needs 2s but we can't afford that without @@ -341,6 +340,11 @@ enum { SATA_PMP_RW_TIMEOUT = 3000, /* PMP read/write timeout */ + /* This should match the actual table size of + * ata_eh_cmd_timeout_table in libata-eh.c. + */ + ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 5, + /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependant */ @@ -598,6 +602,8 @@ struct ata_eh_info { struct ata_eh_context { struct ata_eh_info i; int tries[ATA_MAX_DEVICES]; + int cmd_timeout_idx[ATA_MAX_DEVICES] + [ATA_EH_CMD_TIMEOUT_TABLE_SIZE]; unsigned int classes[ATA_MAX_DEVICES]; unsigned int did_probe_mask; unsigned int saved_ncq_enabled; -- cgit v1.2.2 From 18f7ba4c2f4be6b37d925931f04d6cc28d88d1ee Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Tue, 3 Jun 2008 10:33:55 -0700 Subject: libata/ahci: enclosure management support Add Enclosure Management support to libata and ahci. Signed-off-by: Kristen Carlson Accardi Signed-off-by: Jeff Garzik --- include/linux/libata.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 035f8e1cd0ac..5b247b8a6b3b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -169,6 +169,7 @@ enum { ATA_LFLAG_ASSUME_CLASS = ATA_LFLAG_ASSUME_ATA | ATA_LFLAG_ASSUME_SEMB, ATA_LFLAG_NO_RETRY = (1 << 5), /* don't retry this link */ ATA_LFLAG_DISABLED = (1 << 6), /* link is disabled */ + ATA_LFLAG_SW_ACTIVITY = (1 << 7), /* keep activity stats */ /* struct ata_port flags */ ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ @@ -191,6 +192,10 @@ enum { ATA_FLAG_AN = (1 << 18), /* controller supports AN */ ATA_FLAG_PMP = (1 << 19), /* controller supports PMP */ ATA_FLAG_IPM = (1 << 20), /* driver can handle IPM */ + ATA_FLAG_EM = (1 << 21), /* driver supports enclosure + * management */ + ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity + * led */ /* The following flag belongs to ap->pflags but is kept in * ap->flags because it's referenced in many LLDs and will be @@ -446,6 +451,15 @@ enum link_pm { MEDIUM_POWER, }; extern struct device_attribute dev_attr_link_power_management_policy; +extern struct device_attribute dev_attr_em_message_type; +extern struct device_attribute dev_attr_em_message; +extern struct device_attribute dev_attr_sw_activity; + +enum sw_activity { + OFF, + BLINK_ON, + BLINK_OFF, +}; #ifdef CONFIG_ATA_SFF struct ata_ioports { @@ -701,6 +715,7 @@ struct ata_port { struct timer_list fastdrain_timer; unsigned long fastdrain_cnt; + int em_message_type; void *private_data; #ifdef CONFIG_ATA_ACPI @@ -792,6 +807,12 @@ struct ata_port_operations { u8 (*bmdma_status)(struct ata_port *ap); #endif /* CONFIG_ATA_SFF */ + ssize_t (*em_show)(struct ata_port *ap, char *buf); + ssize_t (*em_store)(struct ata_port *ap, const char *message, + size_t size); + ssize_t (*sw_activity_show)(struct ata_device *dev, char *buf); + ssize_t (*sw_activity_store)(struct ata_device *dev, + enum sw_activity val); /* * Obsolete */ -- cgit v1.2.2 From 20a9b6e7c303f2a6f9afe17c0997bc9a3c734442 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 14 Jul 2008 22:38:22 +0200 Subject: i2c: Remove 3 deprecated bus drivers This patch contains the scheduled removal of i2c-i810, i2c-prosavage and i2c-savage4. Signed-off-by: Adrian Bunk Signed-off-by: Jean Delvare --- include/linux/i2c-id.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index 580acc93903e..988e566d3ed5 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -111,7 +111,6 @@ #define I2C_HW_B_RIVA 0x010010 /* Riva based graphics cards */ #define I2C_HW_B_IOC 0x010011 /* IOC bit-wiggling */ #define I2C_HW_B_IXP2000 0x010016 /* GPIO on IXP2000 systems */ -#define I2C_HW_B_S3VIA 0x010018 /* S3Via ProSavage adapter */ #define I2C_HW_B_ZR36067 0x010019 /* Zoran-36057/36067 based boards */ #define I2C_HW_B_PCILYNX 0x01001a /* TI PCILynx I2C adapter */ #define I2C_HW_B_CX2388x 0x01001b /* connexant 2388x based tv cards */ -- cgit v1.2.2 From 67c2e66571c383404a5acd08189194da660da942 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Jul 2008 22:38:23 +0200 Subject: i2c: Delete unused function i2c_smbus_write_quick Function i2c_smbus_write_quick has no users left, so we can delete it. Also update the list of these helper functions which are gone but could be added back if needed. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 8dc730132192..b3695f353f79 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -71,7 +71,6 @@ extern s32 i2c_smbus_xfer (struct i2c_adapter * adapter, u16 addr, /* Now follow the 'nice' access routines. These also document the calling conventions of smbus_access. */ -extern s32 i2c_smbus_write_quick(struct i2c_client * client, u8 value); extern s32 i2c_smbus_read_byte(struct i2c_client * client); extern s32 i2c_smbus_write_byte(struct i2c_client * client, u8 value); extern s32 i2c_smbus_read_byte_data(struct i2c_client * client, u8 command); -- cgit v1.2.2 From ae7193f7fa3e1735ab70807eb6e35a2a6575623f Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Jul 2008 22:38:24 +0200 Subject: i2c: Update stray references to smbus_access That function is actually named i2c_smbus_xfer. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index b3695f353f79..7c36d5188d39 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -69,7 +69,7 @@ extern s32 i2c_smbus_xfer (struct i2c_adapter * adapter, u16 addr, union i2c_smbus_data * data); /* Now follow the 'nice' access routines. These also document the calling - conventions of smbus_access. */ + conventions of i2c_smbus_xfer. */ extern s32 i2c_smbus_read_byte(struct i2c_client * client); extern s32 i2c_smbus_write_byte(struct i2c_client * client, u8 value); @@ -536,7 +536,7 @@ union i2c_smbus_data { /* and one more for user-space compatibility */ }; -/* smbus_access read or write markers */ +/* i2c_smbus_xfer read or write markers */ #define I2C_SMBUS_READ 1 #define I2C_SMBUS_WRITE 0 -- cgit v1.2.2 From c1b6b4f2342d073698dfc2547240c35045a1d00e Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Jul 2008 22:38:28 +0200 Subject: i2c: Let framebuffer drivers set their I2C bus class to DDC Let framebuffer drivers set their I2C bus class to DDC. Once this is done, we will be able to tell the eeprom driver to only probe for EDID EEPROMs on these buses. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 7c36d5188d39..145797fe6a31 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -349,7 +349,7 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data) #define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ #define I2C_CLASS_TV_ANALOG (1<<1) /* bttv + friends */ #define I2C_CLASS_TV_DIGITAL (1<<2) /* dvb cards */ -#define I2C_CLASS_DDC (1<<3) /* i2c-matroxfb ? */ +#define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ #define I2C_CLASS_CAM_ANALOG (1<<4) /* camera with analog CCD */ #define I2C_CLASS_CAM_DIGITAL (1<<5) /* most webcams */ #define I2C_CLASS_SOUND (1<<6) /* sound devices */ -- cgit v1.2.2 From 3401b2fff38fbb8b73ea6bcc69a8370ae5d2a7a0 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Jul 2008 22:38:29 +0200 Subject: i2c: Let bus drivers add SPD to their class Let general purpose I2C/SMBus bus drivers add SPD to their class. Once this is done, we will be able to tell the eeprom driver to only probe for SPD EEPROMs and similar on these buses. Note that I took a conservative approach here, adding I2C_CLASS_SPD to many drivers that have no idea whether they can host SPD EEPROMs or not. This is to make sure that the eeprom driver doesn't stop probing buses where SPD EEPROMs or equivalent live. So, bus driver maintainers and users should feel free to remove the SPD class from drivers those buses never have SPD EEPROMs or they don't want the eeprom driver to bind to them. Likewise, feel free to add the SPD class to any bus driver I might have missed. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 145797fe6a31..839d0ea3dca3 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -353,6 +353,7 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data) #define I2C_CLASS_CAM_ANALOG (1<<4) /* camera with analog CCD */ #define I2C_CLASS_CAM_DIGITAL (1<<5) /* most webcams */ #define I2C_CLASS_SOUND (1<<6) /* sound devices */ +#define I2C_CLASS_SPD (1<<7) /* SPD EEPROMs and similar */ #define I2C_CLASS_ALL (UINT_MAX) /* all of the above */ /* i2c_client_address_data is the struct for holding default client -- cgit v1.2.2 From 0573d11b2bbd0e4774f33f4c1959c1939c055e96 Mon Sep 17 00:00:00 2001 From: Eric Brower Date: Mon, 14 Jul 2008 22:38:31 +0200 Subject: i2c-algo-pcf: Multi-master lost-arbitration improvement Improve lost-arbitration handling of PCF8584. This is necessary for support of a currently out-of-kernel driver for Sun Microsystems E250 environmental management; perhaps others. Signed-off-by: Eric Brower Acked-by: Dan Smolik Signed-off-by: Jean Delvare --- include/linux/i2c-algo-pcf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c-algo-pcf.h b/include/linux/i2c-algo-pcf.h index 77afbb60fd11..74fb6f889a77 100644 --- a/include/linux/i2c-algo-pcf.h +++ b/include/linux/i2c-algo-pcf.h @@ -36,6 +36,12 @@ struct i2c_algo_pcf_data { /* local settings */ int udelay; int timeout; + + /* Multi-master lost arbitration back-off delay (msecs) + * This should be set by the bus adapter or knowledgable client + * if bus is multi-mastered, else zero + */ + unsigned long lab_mdelay; }; int i2c_pcf_add_bus(struct i2c_adapter *); -- cgit v1.2.2 From e3e7fc3c401a5d53f0599a357b3cf65d6a4f52e3 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Jul 2008 22:38:31 +0200 Subject: i2c-algo-pcf: Drop unused struct members Struct members udelay and timeout aren't used anywhere, so drop them. Signed-off-by: Jean Delvare Acked-by: Eric Brower --- include/linux/i2c-algo-pcf.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-algo-pcf.h b/include/linux/i2c-algo-pcf.h index 74fb6f889a77..0177d280f733 100644 --- a/include/linux/i2c-algo-pcf.h +++ b/include/linux/i2c-algo-pcf.h @@ -33,10 +33,6 @@ struct i2c_algo_pcf_data { int (*getclock) (void *data); void (*waitforpin) (void); - /* local settings */ - int udelay; - int timeout; - /* Multi-master lost arbitration back-off delay (msecs) * This should be set by the bus adapter or knowledgable client * if bus is multi-mastered, else zero -- cgit v1.2.2 From f6a7110520037ba786f17b53790c6eb8a3d4ef55 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Jul 2008 22:38:34 +0200 Subject: i2c-dev: Delete empty detach_client callback Implementing detach_client is optional, so there is no point in an empty implementation. Likewise, i2c driver IDs are optional, and we don't need one. Signed-off-by: Jean Delvare --- include/linux/i2c-id.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index 988e566d3ed5..ef13b7c66df3 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -91,8 +91,6 @@ #define I2C_DRIVERID_M52790 95 /* Mitsubishi M52790SP/FP AV switch */ #define I2C_DRIVERID_CS5345 96 /* cs5345 audio processor */ -#define I2C_DRIVERID_I2CDEV 900 - #define I2C_DRIVERID_OV7670 1048 /* Omnivision 7670 camera */ /* -- cgit v1.2.2 From e9ca9eb9d7fc7bf3dc3cec5ba7edb089c4625f7b Mon Sep 17 00:00:00 2001 From: Jon Smirl Date: Mon, 14 Jul 2008 22:38:35 +0200 Subject: i2c: Export the i2c_bus_type symbol Export the root of the i2c bus so that PowerPC device tree code can iterate over devices on the i2c bus. Signed-off-by: Jon Smirl Signed-off-by: Jean Delvare --- include/linux/i2c.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 839d0ea3dca3..50cbab4b62b0 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -35,6 +35,8 @@ #include /* for completion */ #include +extern struct bus_type i2c_bus_type; + /* --- General options ------------------------------------------------ */ struct i2c_msg; -- cgit v1.2.2 From 2b7a5056a0a7ff17d5d2004c29c852a92a6bd632 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 14 Jul 2008 22:38:35 +0200 Subject: i2c: New-style EEPROM driver using device IDs Add a new-style driver for most I2C EEPROMs, giving sysfs read/write access to their data. Tested with various chips and clock rates. Signed-off-by: Wolfram Sang Signed-off-by: Jean Delvare --- include/linux/i2c/at24.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/linux/i2c/at24.h (limited to 'include/linux') diff --git a/include/linux/i2c/at24.h b/include/linux/i2c/at24.h new file mode 100644 index 000000000000..f6edd522a929 --- /dev/null +++ b/include/linux/i2c/at24.h @@ -0,0 +1,28 @@ +#ifndef _LINUX_AT24_H +#define _LINUX_AT24_H + +#include + +/* + * As seen through Linux I2C, differences between the most common types of I2C + * memory include: + * - How much memory is available (usually specified in bit)? + * - What write page size does it support? + * - Special flags (16 bit addresses, read_only, world readable...)? + * + * If you set up a custom eeprom type, please double-check the parameters. + * Especially page_size needs extra care, as you risk data loss if your value + * is bigger than what the chip actually supports! + */ + +struct at24_platform_data { + u32 byte_len; /* size (sum of all addr) */ + u16 page_size; /* for writes */ + u8 flags; +#define AT24_FLAG_ADDR16 0x80 /* address pointer is 16 bit */ +#define AT24_FLAG_READONLY 0x40 /* sysfs-entry will be read-only */ +#define AT24_FLAG_IRUGO 0x20 /* sysfs-entry will be world-readable */ +#define AT24_FLAG_TAKE8ADDR 0x10 /* take always 8 addresses (24c00) */ +}; + +#endif /* _LINUX_AT24_H */ -- cgit v1.2.2 From 4735c98f8447acb1c8977e2b8024640f7bf36dd6 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Jul 2008 22:38:36 +0200 Subject: i2c: Add detection capability to new-style drivers Add a mechanism to let new-style i2c drivers optionally autodetect devices they would support on selected buses and ask i2c-core to instantiate them. This is a replacement for legacy i2c drivers, much cleaner. Where drivers had to implement both a legacy i2c_driver and a new-style i2c_driver so far, this mechanism makes it possible to get rid of the legacy i2c_driver and implement both enumerated and detected device support with just one (new-style) i2c_driver. Here is a quick conversion guide for these drivers, step by step: * Delete the legacy driver definition, registration and removal. Delete the attach_adapter and detach_client methods of the legacy driver. * Change the prototype of the legacy detect function from static int foo_detect(struct i2c_adapter *adapter, int address, int kind); to static int foo_detect(struct i2c_client *client, int kind, struct i2c_board_info *info); * Set the new-style driver detect callback to this new function, and set its address_data to &addr_data (addr_data is generally provided by I2C_CLIENT_INSMOD.) * Add the appropriate class to the new-style driver. This is typically the class the legacy attach_adapter method was checking for. Class checking is now mandatory (done by i2c-core.) See for the list of available classes. * Remove the i2c_client allocation and freeing from the detect function. A pre-allocated client is now handed to you by i2c-core, and is freed automatically. * Make the detect function fill the type field of the i2c_board_info structure it was passed as a parameter, and return 0, on success. If the detection fails, return -ENODEV. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 50cbab4b62b0..08be0d21864c 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -45,6 +45,7 @@ struct i2c_adapter; struct i2c_client; struct i2c_driver; union i2c_smbus_data; +struct i2c_board_info; /* * The master routines are the ones normally used to transmit data to devices @@ -94,15 +95,33 @@ extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client * client, u8 command, u8 length, const u8 *values); -/* - * A driver is capable of handling one or more physical devices present on - * I2C adapters. This information is used to inform the driver of adapter - * events. +/** + * struct i2c_driver - represent an I2C device driver + * @class: What kind of i2c device we instantiate (for detect) + * @detect: Callback for device detection + * @address_data: The I2C addresses to probe, ignore or force (for detect) + * @clients: List of detected clients we created (for i2c-core use only) * * The driver.owner field should be set to the module owner of this driver. * The driver.name field should be set to the name of this driver. + * + * For automatic device detection, both @detect and @address_data must + * be defined. @class should also be set, otherwise only devices forced + * with module parameters will be created. The detect function must + * fill at least the name field of the i2c_board_info structure it is + * handed upon successful detection, and possibly also the flags field. + * + * If @detect is missing, the driver will still work fine for enumerated + * devices. Detected devices simply won't be supported. This is expected + * for the many I2C/SMBus devices which can't be detected reliably, and + * the ones which can always be enumerated in practice. + * + * The i2c_client structure which is handed to the @detect callback is + * not a real i2c_client. It is initialized just enough so that you can + * call i2c_smbus_read_byte_data and friends on it. Don't do anything + * else with it. In particular, calling dev_dbg and friends on it is + * not allowed. */ - struct i2c_driver { int id; unsigned int class; @@ -142,6 +161,11 @@ struct i2c_driver { struct device_driver driver; const struct i2c_device_id *id_table; + + /* Device detection callback for automatic device creation */ + int (*detect)(struct i2c_client *, int kind, struct i2c_board_info *); + const struct i2c_client_address_data *address_data; + struct list_head clients; }; #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver) @@ -157,6 +181,7 @@ struct i2c_driver { * @dev: Driver model device node for the slave. * @irq: indicates the IRQ generated by this device (if any) * @list: list of active/busy clients (DEPRECATED) + * @detected: member of an i2c_driver.clients list * @released: used to synchronize client releases & detaches and references * * An i2c_client identifies a single device (i.e. chip) connected to an @@ -174,6 +199,7 @@ struct i2c_client { struct device dev; /* the device structure */ int irq; /* irq issued by device */ struct list_head list; /* DEPRECATED */ + struct list_head detected; struct completion released; }; #define to_i2c_client(d) container_of(d, struct i2c_client, dev) -- cgit v1.2.2 From 521e575b9a7324a0bca762622139f69582a042bf Mon Sep 17 00:00:00 2001 From: Ron Livne Date: Mon, 14 Jul 2008 23:48:48 -0700 Subject: IB/mlx4: Add support for blocking multicast loopback packets Add support for handling the IB_QP_CREATE_MULTICAST_BLOCK_LOOPBACK flag by using the per-multicast group loopback blocking feature of mlx4 hardware. Signed-off-by: Ron Livne Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index a744383d16e9..81b3dd5206e0 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -398,7 +398,8 @@ int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_waterm int mlx4_INIT_PORT(struct mlx4_dev *dev, int port); int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port); -int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); +int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + int block_mcast_loopback); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, -- cgit v1.2.2 From 124cafc5eb973e748c4ce3dc1caad29274e64613 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 15 Jul 2008 21:21:44 +0200 Subject: ide: remove ide_init_drive_cmd ide_init_drive_cmd just calls blk_rq_init. This converts the users of ide_init_drive_cmd to use blk_rq_init directly and removes ide_init_drive_cmd. Signed-off-by: FUJITA Tomonori Cc: Borislav Petkov Cc: Jens Axboe Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index eddb6daadf4a..3261c6691759 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -857,8 +857,6 @@ int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); extern ide_startstop_t ide_do_reset (ide_drive_t *); -extern void ide_init_drive_cmd (struct request *rq); - /* * "action" parameter type for ide_do_drive_cmd() below. */ -- cgit v1.2.2 From 681a561b7ec7fdcd8f35b68e44ac6d6c70aecc04 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 15 Jul 2008 21:21:45 +0200 Subject: block: unexport blk_end_sync_rq All the users of blk_end_sync_rq has gone (they are converted to use blk_execute_rq). This unexports blk_end_sync_rq. Signed-off-by: FUJITA Tomonori Cc: Borislav Petkov Signed-off-by: Jens Axboe Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d2a1b71e93c3..1171abd7eb17 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -623,7 +623,6 @@ extern void generic_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); -extern void blk_end_sync_rq(struct request *rq, int error); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); -- cgit v1.2.2 From 30e5ee4d1a651a0c66e86c6612c003034bd20ba2 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:46 +0200 Subject: ide: remove obsoleted "idebus=" kernel parameter * Remove obsoleted "idebus=" kernel parameter. * Remove no longer needed ide_system_bus_speed() and system_bus_clock() (together with idebus_parameter and system_bus_speed variables). Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 3261c6691759..dad535659249 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -994,8 +994,6 @@ int ide_taskfile_ioctl(ide_drive_t *, unsigned int, unsigned long); int ide_cmd_ioctl(ide_drive_t *, unsigned int, unsigned long); int ide_task_ioctl(ide_drive_t *, unsigned int, unsigned long); -extern int system_bus_clock(void); - extern int ide_driveid_update(ide_drive_t *); extern int ide_config_drive_speed(ide_drive_t *, u8); extern u8 eighty_ninty_three (ide_drive_t *); -- cgit v1.2.2 From 931ee0dc5c69e8113233d21942681ab8fecde7f9 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:47 +0200 Subject: ide: remove obsoleted "ide=" kernel parameters * Remove obsoleted "ide=" kernel parameters. * Remove no longer needed: - ide_setup() - parse_options() - __setup("", ...) - module_param(options, ...) * Use module_{init,exit}() for MODULE=y case and remove MODULE ifdef. * Make ide_*acpi* and ide_doubler variables static. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index dad535659249..0fa1812d0438 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -813,10 +813,6 @@ int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsig #ifndef _IDE_C extern ide_hwif_t ide_hwifs[]; /* master data repository */ #endif -extern int ide_noacpi; -extern int ide_acpigtf; -extern int ide_acpionboot; -extern int noautodma; extern int ide_vlb_clk; extern int ide_pci_clk; -- cgit v1.2.2 From 9a410e79b552bacb4481f85618aa7333b7776ed7 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:48 +0200 Subject: ide: remove IDE_TFLAG_NO_SELECT_MASK taskfile flag Always call SELECT_MASK(..., 0) in ide_tf_load() (needs to be done to match ide_set_irq(..., 1)) and then remove IDE_TFLAG_NO_SELECT_MASK taskfile flag. This change should only affect hpt366 and icside host drivers since ->maskproc(..., 0) for sgiioc4 is equivalent to ide_set_irq(..., 1). Cc: Sergei Shtylyov Cc: Russell King Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 0fa1812d0438..d4a910cdb907 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -869,7 +869,6 @@ extern void ide_end_drive_cmd(ide_drive_t *, u8, u8); enum { IDE_TFLAG_LBA48 = (1 << 0), - IDE_TFLAG_NO_SELECT_MASK = (1 << 1), IDE_TFLAG_FLAGGED = (1 << 2), IDE_TFLAG_OUT_DATA = (1 << 3), IDE_TFLAG_OUT_HOB_FEATURE = (1 << 4), -- cgit v1.2.2 From ed4af48fd660176680da905817f6e40d51436e4c Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:48 +0200 Subject: ide: move IRQ unmasking out from ->tf_load method Move IRQ unmasking out from ->tf_load method to its users. There should be no functional changes caused by this patch (SELECT_MASK() is NOP except for hpt366, icside and sgiioc4). Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index d4a910cdb907..56d0bc2dffee 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -973,6 +973,7 @@ typedef struct ide_task_s { void ide_tf_dump(const char *, struct ide_taskfile *); extern void SELECT_DRIVE(ide_drive_t *); +void SELECT_MASK(ide_drive_t *, int); extern int drive_is_ready(ide_drive_t *); -- cgit v1.2.2 From 135721446144af005109c25eeacca4fdddcd9a66 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:49 +0200 Subject: ide: remove ->mmio flag from ide_hwif_t Since scc_pata host driver no longer uses IDE PCI layer / ide_dma_setup() and all other ->mmio users set also IDE_HFLAG_MMIO host flag we can safely remove ->mmio flag. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 56d0bc2dffee..b01b102be4de 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -532,7 +532,6 @@ typedef struct hwif_s { unsigned serialized : 1; /* serialized all channel operation */ unsigned sharing_irq: 1; /* 1 = sharing irq with another hwif */ unsigned sg_mapped : 1; /* sg_table and sg_nents are ready */ - unsigned mmio : 1; /* host uses MMIO */ struct device gendev; struct device *portdev; -- cgit v1.2.2 From f8c4bd0ab2b8783c0f080957781e9f70bee48eaa Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:49 +0200 Subject: ide: pass 'hwif *' instead of 'drive *' to ->OUTBSYNC method There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index b01b102be4de..1c3431469649 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -493,7 +493,7 @@ typedef struct hwif_s { void (*ide_dma_clear_irq)(ide_drive_t *drive); void (*OUTB)(u8 addr, unsigned long port); - void (*OUTBSYNC)(ide_drive_t *drive, u8 addr, unsigned long port); + void (*OUTBSYNC)(struct hwif_s *hwif, u8 addr, unsigned long port); u8 (*INB)(unsigned long port); -- cgit v1.2.2 From 0fd04dcc2ebb6ec9088c24b368b0ce1f42a98ef5 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:50 +0200 Subject: ide: use ->OUTBSYNC in ide_set_irq() Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 1c3431469649..4d1c9714f1d9 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1340,7 +1340,8 @@ static inline void ide_set_irq(ide_drive_t *drive, int on) { ide_hwif_t *hwif = drive->hwif; - hwif->OUTB(drive->ctl | (on ? 0 : 2), hwif->io_ports.ctl_addr); + hwif->OUTBSYNC(hwif, drive->ctl | (on ? 0 : 2), + hwif->io_ports.ctl_addr); } static inline u8 ide_read_status(ide_drive_t *drive) -- cgit v1.2.2 From ff07488346702f554aaeb6aae982540aa0302373 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:50 +0200 Subject: ide: remove drive->ctl Remove drive->ctl (it is always equal to 0x08 after init time). While at it: * Use ATA_DEVCTL_OBS define. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 4d1c9714f1d9..d8c86f0362c4 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -364,7 +364,6 @@ typedef struct ide_drive_s { u8 wcache; /* status of write cache */ u8 acoustic; /* acoustic management */ u8 media; /* disk, cdrom, tape, floppy, ... */ - u8 ctl; /* "normal" value for Control register */ u8 ready_stat; /* min status value for drive ready */ u8 mult_count; /* current multiple sector setting */ u8 mult_req; /* requested multiple sector setting */ @@ -1340,7 +1339,7 @@ static inline void ide_set_irq(ide_drive_t *drive, int on) { ide_hwif_t *hwif = drive->hwif; - hwif->OUTBSYNC(hwif, drive->ctl | (on ? 0 : 2), + hwif->OUTBSYNC(hwif, ATA_DEVCTL_OBS | (on ? 0 : 2), hwif->io_ports.ctl_addr); } -- cgit v1.2.2 From 63f5abb0959337db0d5bece9cefba03cdcadec51 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 15 Jul 2008 21:21:51 +0200 Subject: ide: remove action argument in ide_do_drive_cmd ide_do_drive_cmd is called only with ide_preempt action argument. So we can remove the action argument in ide_do_drive_cmd and ide_action_t typedef. This patch also includes two minor cleanups: 1) ide_do_drive_cmd always succeeds so we don't need the return value; 2) the callers use blk_rq_init before ide_do_drive_cmd so there is no need to initialize rq->errors. Signed-off-by: FUJITA Tomonori Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index d8c86f0362c4..04267dc1edf2 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -851,17 +851,7 @@ int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); extern ide_startstop_t ide_do_reset (ide_drive_t *); -/* - * "action" parameter type for ide_do_drive_cmd() below. - */ -typedef enum { - ide_wait, /* insert rq at end of list, and wait for it */ - ide_preempt, /* insert rq in front of current request */ - ide_head_wait, /* insert rq in front of current request and wait for it */ - ide_end /* insert rq at end of list, but don't wait for it */ -} ide_action_t; - -extern int ide_do_drive_cmd(ide_drive_t *, struct request *, ide_action_t); +extern void ide_do_drive_cmd(ide_drive_t *, struct request *); extern void ide_end_drive_cmd(ide_drive_t *, u8, u8); -- cgit v1.2.2 From 92f5daff2b8439fa4c57c57f47823ffc459c3bd9 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:55 +0200 Subject: ide-tape: make pc->idetape_callback void There should be no functional changes caused by this patch. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 04267dc1edf2..8936b21a7030 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -641,7 +641,7 @@ struct ide_atapi_pc { */ u8 pc_buf[256]; void (*idefloppy_callback) (ide_drive_t *); - ide_startstop_t (*idetape_callback) (ide_drive_t *); + void (*idetape_callback) (ide_drive_t *); /* idetape only */ struct idetape_bh *bh; -- cgit v1.2.2 From 1b06e92aa03018e4b3ba281e03a7711d9b71a998 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:56 +0200 Subject: ide-{floppy,tape}: merge pc->idefloppy_callback and pc->idetape_callback Merge pc->idefloppy_callback and pc->idetape_callback into pc->callback. There should be no functional changes caused by this patch. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 8936b21a7030..f079456adfdb 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -640,8 +640,8 @@ struct ide_atapi_pc { * to change/removal later. */ u8 pc_buf[256]; - void (*idefloppy_callback) (ide_drive_t *); - void (*idetape_callback) (ide_drive_t *); + + void (*callback)(ide_drive_t *); /* idetape only */ struct idetape_bh *bh; -- cgit v1.2.2 From 5e3310958204912f3f00be2592c945fbc37db6ae Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:56 +0200 Subject: ide-{floppy,tape}: PC_FLAG_DMA_RECOMMENDED -> PC_FLAG_DMA_OK * Use PC_FLAG_DMA_OK flag instead of PC_FLAG_DMA_RECOMMENDED one. * Remove no longer used PC_FLAG_DMA_RECOMMENDED flag. There should be no functional changes caused by this patch. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index f079456adfdb..63cee2947f60 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -602,12 +602,11 @@ enum { PC_FLAG_SUPPRESS_ERROR = (1 << 1), PC_FLAG_WAIT_FOR_DSC = (1 << 2), PC_FLAG_DMA_OK = (1 << 3), - PC_FLAG_DMA_RECOMMENDED = (1 << 4), - PC_FLAG_DMA_IN_PROGRESS = (1 << 5), - PC_FLAG_DMA_ERROR = (1 << 6), - PC_FLAG_WRITING = (1 << 7), + PC_FLAG_DMA_IN_PROGRESS = (1 << 4), + PC_FLAG_DMA_ERROR = (1 << 5), + PC_FLAG_WRITING = (1 << 6), /* command timed out */ - PC_FLAG_TIMEDOUT = (1 << 8), + PC_FLAG_TIMEDOUT = (1 << 7), }; struct ide_atapi_pc { -- cgit v1.2.2 From 5d41893c0f9caf94b449eada0279a08c86f0212e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:57 +0200 Subject: ide: add PC_FLAG_ZIP_DRIVE pc flag Add PC_FLAG_ZIP_DRIVE pc flag, set it in idefloppy_do_request() and check for it (instead of checking for IDEFLOPPY_FLAG_ZIP_DRIVE) in idefloppy_transfer_pc(). This is a preparation for adding generic ide_transfer_pc() helper. There should be no functional changes caused by this patch. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 63cee2947f60..89feaea9e20b 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -607,6 +607,7 @@ enum { PC_FLAG_WRITING = (1 << 6), /* command timed out */ PC_FLAG_TIMEDOUT = (1 << 7), + PC_FLAG_ZIP_DRIVE = (1 << 8), }; struct ide_atapi_pc { -- cgit v1.2.2 From 594c16d8dd54cd7b1c5ef1ec3ac0f6bf34301dad Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:58 +0200 Subject: ide: add ide_transfer_pc() helper * Add ide-atapi.c file for generic ATAPI support together with CONFIG_IDE_ATAPI config option. * Add generic ide_transfer_pc() helper to ide-atapi.c and then convert ide-{floppy,tape,scsi} device drivers to use it. There should be no functional changes caused by this patch. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 89feaea9e20b..bed3c58798ae 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -967,6 +967,9 @@ extern int drive_is_ready(ide_drive_t *); void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8); +ide_startstop_t ide_transfer_pc(ide_drive_t *, struct ide_atapi_pc *, + ide_handler_t *, unsigned int, ide_expiry_t *); + ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *); void task_end_request(ide_drive_t *, struct request *, u8); -- cgit v1.2.2 From 28c7214bd8c2bbd4873b8f1e7f58d86d3731124f Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:59 +0200 Subject: ide: add PC_FLAG_DRQ_INTERRUPT pc flag Add PC_FLAG_DRQ_INTERRUPT pc flag, set it in ide*_do_request() and check for it (instead of checking for IDE*_FLAG_DRQ_INTERRUPT) in ide*_issue_pc(). This is a preparation for adding generic ide_issue_pc() helper. There should be no functional changes caused by this patch. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index bed3c58798ae..c2274ad44b2e 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -608,6 +608,7 @@ enum { /* command timed out */ PC_FLAG_TIMEDOUT = (1 << 7), PC_FLAG_ZIP_DRIVE = (1 << 8), + PC_FLAG_DRQ_INTERRUPT = (1 << 9), }; struct ide_atapi_pc { -- cgit v1.2.2 From 6bf1641ca1c7554f0da54aaf89788731b541bacc Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:22:00 +0200 Subject: ide: add ide_issue_pc() helper Add generic ide_issue_pc() helper to ide-atapi.c and then convert ide-{floppy,tape,scsi} device drivers to use it. There should be no functional changes caused by this patch. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index c2274ad44b2e..fee07a7edb19 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -970,6 +970,8 @@ void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8); ide_startstop_t ide_transfer_pc(ide_drive_t *, struct ide_atapi_pc *, ide_handler_t *, unsigned int, ide_expiry_t *); +ide_startstop_t ide_issue_pc(ide_drive_t *, struct ide_atapi_pc *, + ide_handler_t *, unsigned int, ide_expiry_t *); ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *); -- cgit v1.2.2 From 646c0cb6c430f8d3ad3769dd1518fe664ff0ce27 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:22:03 +0200 Subject: ide: add ide_pc_intr() helper * ide-tape.c: add 'drive' argument to idetape_update_buffers(). * Add generic ide_pc_intr() helper to ide-atapi.c and then convert ide-{floppy,tape,scsi} device drivers to use it. * ide-tape.c: remove no longer needed DBG_PC_INTR. There should be no functional changes caused by this patch (unless the debugging is explicitely compiled in). Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index fee07a7edb19..ac4eeb2932ef 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -968,6 +968,12 @@ extern int drive_is_ready(ide_drive_t *); void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8); +ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, + ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry, + void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *), + void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *), + void (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned int, + int)); ide_startstop_t ide_transfer_pc(ide_drive_t *, struct ide_atapi_pc *, ide_handler_t *, unsigned int, ide_expiry_t *); ide_startstop_t ide_issue_pc(ide_drive_t *, struct ide_atapi_pc *, -- cgit v1.2.2