From 979f693def9084a452846365dfde5dcb28366333 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Sep 2009 14:44:11 +0200 Subject: ratelimit: Use per ratelimit context locking I'd like to use printk_ratelimit() in atomic context, but that's not possible right now due to the spinlock usage this commit introduced more than a year ago: 717115e: printk ratelimiting rewrite As a first step push the lock into the ratelimit state structure. This allows us to deal with locking failures to be considered as an event related to that state being too busy. Also clean up the code a bit (without changing functionality): - tidy up the definitions - clean up the code flow This also shrinks the code a tiny bit: text data bss dec hex filename 264 0 4 268 10c ratelimit.o.before 255 0 0 255 ff ratelimit.o.after ( Whole-kernel data size got a bit larger, because we have two ratelimit-state data structures right now. ) Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: David S. Miller LKML-Reference: Signed-off-by: Ingo Molnar --- lib/ratelimit.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 26187edcc7ea..0e2c28e8a0ca 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -7,15 +7,12 @@ * parameter. Now every user can use their own standalone ratelimit_state. * * This file is released under the GPLv2. - * */ #include #include #include -static DEFINE_SPINLOCK(ratelimit_lock); - /* * __ratelimit - rate limiting * @rs: ratelimit_state data @@ -26,11 +23,12 @@ static DEFINE_SPINLOCK(ratelimit_lock); int __ratelimit(struct ratelimit_state *rs) { unsigned long flags; + int ret; if (!rs->interval) return 1; - spin_lock_irqsave(&ratelimit_lock, flags); + spin_lock_irqsave(&rs->lock, flags); if (!rs->begin) rs->begin = jiffies; @@ -38,20 +36,19 @@ int __ratelimit(struct ratelimit_state *rs) if (rs->missed) printk(KERN_WARNING "%s: %d callbacks suppressed\n", __func__, rs->missed); - rs->begin = 0; + rs->begin = 0; rs->printed = 0; - rs->missed = 0; + rs->missed = 0; } - if (rs->burst && rs->burst > rs->printed) - goto print; - - rs->missed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 0; + if (rs->burst && rs->burst > rs->printed) { + rs->printed++; + ret = 1; + } else { + rs->missed++; + ret = 0; + } + spin_unlock_irqrestore(&rs->lock, flags); -print: - rs->printed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 1; + return ret; } EXPORT_SYMBOL(__ratelimit); -- cgit v1.2.2 From edaac8e3167501cda336231d00611bf59c164346 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Sep 2009 14:44:11 +0200 Subject: ratelimit: Fix/allow use in atomic contexts I'd like to use printk_ratelimit() in NMI context, but it's not robust right now due to spinlock usage in lib/ratelimit.c. If an NMI is unlucky enough to hit just that spot we might lock up trying to take the spinlock again. Fix that by using a trylock variant. If we contend on that lock we can genuinely skip the message because the state is just being accessed by another CPU (or by this CPU). ( We could use atomics for the suppressed messages field, but i doubt it matters in practice and it makes the code heavier. ) Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: David S. Miller LKML-Reference: Signed-off-by: Ingo Molnar --- lib/ratelimit.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 0e2c28e8a0ca..69bfcacda16d 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -28,7 +28,15 @@ int __ratelimit(struct ratelimit_state *rs) if (!rs->interval) return 1; - spin_lock_irqsave(&rs->lock, flags); + /* + * If we contend on this state's lock then almost + * by definition we are too busy to print a message, + * in addition to the one that will be printed by + * the entity that is holding the lock already: + */ + if (!spin_trylock_irqsave(&rs->lock, flags)) + return 1; + if (!rs->begin) rs->begin = jiffies; -- cgit v1.2.2 From 3fff4c42bd0a89869a0eb1e7874cc06ffa4aa0f5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Sep 2009 16:18:09 +0200 Subject: printk: Remove ratelimit.h from kernel.h Decouple kernel.h from ratelimit.h: the global declaration of printk's ratelimit_state is not needed, and it leads to messy circular dependencies due to ratelimit.h's (new) adding of a spinlock_types.h include. Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: David S. Miller LKML-Reference: Signed-off-by: Ingo Molnar --- lib/ratelimit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 69bfcacda16d..5551731ae1d4 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -9,7 +9,7 @@ * This file is released under the GPLv2. */ -#include +#include #include #include -- cgit v1.2.2 From 96a2c464de07d7c72988db851c029b204fc59108 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 1 Aug 2009 01:34:24 +0200 Subject: tracing/bkl: Add bkl ftrace events Add two events lock_kernel and unlock_kernel() to trace the bkl uses. This opens the door for userspace tools to perform statistics about the callsites that use it, dependencies with other locks (by pairing the trace with lock events), use with recursivity and so on... The {__reacquire,release}_kernel_lock() events are not traced because these are called from schedule, thus the sched events are sufficient to trace them. Example of a trace: hald-addon-stor-4152 [000] 165.875501: unlock_kernel: depth: 0, fs/block_dev.c:1358 __blkdev_put() hald-addon-stor-4152 [000] 167.832974: lock_kernel: depth: 0, fs/block_dev.c:1167 __blkdev_get() How to get the callsites that acquire it recursively: cd /debug/tracing/events/bkl echo "lock_depth > 0" > filter firefox-4951 [001] 206.276967: unlock_kernel: depth: 1, fs/reiserfs/super.c:575 reiserfs_dirty_inode() You can also filter by file and/or line. v2: Use of FILTER_PTR_STRING attribute for files and lines fields to make them traceable. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Li Zefan --- lib/kernel_lock.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 39f1029e3525..5c10b2e1fd08 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -5,10 +5,11 @@ * relegated to obsolescence, but used by various less * important (or lazy) subsystems. */ -#include #include #include #include +#define CREATE_TRACE_POINTS +#include /* * The 'big kernel lock' @@ -113,7 +114,7 @@ static inline void __unlock_kernel(void) * This cannot happen asynchronously, so we only need to * worry about other CPU's. */ -void __lockfunc lock_kernel(void) +void __lockfunc _lock_kernel(void) { int depth = current->lock_depth+1; if (likely(!depth)) @@ -121,13 +122,13 @@ void __lockfunc lock_kernel(void) current->lock_depth = depth; } -void __lockfunc unlock_kernel(void) +void __lockfunc _unlock_kernel(void) { BUG_ON(current->lock_depth < 0); if (likely(--current->lock_depth < 0)) __unlock_kernel(); } -EXPORT_SYMBOL(lock_kernel); -EXPORT_SYMBOL(unlock_kernel); +EXPORT_SYMBOL(_lock_kernel); +EXPORT_SYMBOL(_unlock_kernel); -- cgit v1.2.2 From 925936ebf35a95c290e010b784c962164e6728f3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 28 Sep 2009 17:12:49 +0200 Subject: tracing: Pushdown the bkl tracepoints calls Currently we are calling the bkl tracepoint callbacks just before the bkl lock/unlock operations, ie the tracepoint call is not inside a lock_kernel() function but inside a lock_kernel() macro. Hence the bkl trace event header must be included from smp_lock.h. This raises some nasty circular header dependencies: linux/smp_lock.h -> trace/events/bkl.h -> trace/define_trace.h -> trace/ftrace.h -> linux/ftrace_event.h -> linux/hardirq.h -> linux/smp_lock.h This results in incomplete event declarations, spurious event definitions and other kind of funny behaviours. This is hardly fixable without ugly workarounds. So instead, we push the file name, line number and function name as lock_kernel() parameters, so that we only deal with the trace event header from lib/kernel_lock.c This adds two parameters to lock_kernel() and unlock_kernel() but it should be fine wrt to performances because this pair dos not seem to be called in fast paths. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Ingo Molnar Cc: Li Zefan --- lib/kernel_lock.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 5c10b2e1fd08..4ebfa5a164d7 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -8,9 +8,11 @@ #include #include #include -#define CREATE_TRACE_POINTS #include +#define CREATE_TRACE_POINTS +#include + /* * The 'big kernel lock' * @@ -114,19 +116,24 @@ static inline void __unlock_kernel(void) * This cannot happen asynchronously, so we only need to * worry about other CPU's. */ -void __lockfunc _lock_kernel(void) +void __lockfunc _lock_kernel(const char *func, const char *file, int line) { - int depth = current->lock_depth+1; + int depth = current->lock_depth + 1; + + trace_lock_kernel(func, file, line); + if (likely(!depth)) __lock_kernel(); current->lock_depth = depth; } -void __lockfunc _unlock_kernel(void) +void __lockfunc _unlock_kernel(const char *func, const char *file, int line) { BUG_ON(current->lock_depth < 0); if (likely(--current->lock_depth < 0)) __unlock_kernel(); + + trace_unlock_kernel(func, file, line); } EXPORT_SYMBOL(_lock_kernel); -- cgit v1.2.2 From b411b3637fa71fce9cf2acf0639009500f5892fe Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 25 Sep 2009 16:07:19 -0700 Subject: The DRBD driver Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- lib/Kconfig | 3 + lib/Makefile | 2 + lib/lru_cache.c | 560 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 565 insertions(+) create mode 100644 lib/lru_cache.c (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index bb1326d3839c..1cfe51628e1b 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -200,4 +200,7 @@ config NLATTR config GENERIC_ATOMIC64 bool +config LRU_CACHE + tristate + endmenu diff --git a/lib/Makefile b/lib/Makefile index 2e78277eff9d..347ad8db29d3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -91,6 +91,8 @@ obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o obj-$(CONFIG_NLATTR) += nlattr.o +obj-$(CONFIG_LRU_CACHE) += lru_cache.o + obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o obj-$(CONFIG_GENERIC_CSUM) += checksum.o diff --git a/lib/lru_cache.c b/lib/lru_cache.c new file mode 100644 index 000000000000..270de9d31b8c --- /dev/null +++ b/lib/lru_cache.c @@ -0,0 +1,560 @@ +/* + lru_cache.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include +#include +#include /* for memset */ +#include /* for seq_printf */ +#include + +MODULE_AUTHOR("Philipp Reisner , " + "Lars Ellenberg "); +MODULE_DESCRIPTION("lru_cache - Track sets of hot objects"); +MODULE_LICENSE("GPL"); + +/* this is developers aid only. + * it catches concurrent access (lack of locking on the users part) */ +#define PARANOIA_ENTRY() do { \ + BUG_ON(!lc); \ + BUG_ON(!lc->nr_elements); \ + BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \ +} while (0) + +#define RETURN(x...) do { \ + clear_bit(__LC_PARANOIA, &lc->flags); \ + smp_mb__after_clear_bit(); return x ; } while (0) + +/* BUG() if e is not one of the elements tracked by lc */ +#define PARANOIA_LC_ELEMENT(lc, e) do { \ + struct lru_cache *lc_ = (lc); \ + struct lc_element *e_ = (e); \ + unsigned i = e_->lc_index; \ + BUG_ON(i >= lc_->nr_elements); \ + BUG_ON(lc_->lc_element[i] != e_); } while (0) + +/** + * lc_create - prepares to track objects in an active set + * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details + * @e_count: number of elements allowed to be active simultaneously + * @e_size: size of the tracked objects + * @e_off: offset to the &struct lc_element member in a tracked object + * + * Returns a pointer to a newly initialized struct lru_cache on success, + * or NULL on (allocation) failure. + */ +struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned e_count, size_t e_size, size_t e_off) +{ + struct hlist_head *slot = NULL; + struct lc_element **element = NULL; + struct lru_cache *lc; + struct lc_element *e; + unsigned cache_obj_size = kmem_cache_size(cache); + unsigned i; + + WARN_ON(cache_obj_size < e_size); + if (cache_obj_size < e_size) + return NULL; + + /* e_count too big; would probably fail the allocation below anyways. + * for typical use cases, e_count should be few thousand at most. */ + if (e_count > LC_MAX_ACTIVE) + return NULL; + + slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL); + if (!slot) + goto out_fail; + element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL); + if (!element) + goto out_fail; + + lc = kzalloc(sizeof(*lc), GFP_KERNEL); + if (!lc) + goto out_fail; + + INIT_LIST_HEAD(&lc->in_use); + INIT_LIST_HEAD(&lc->lru); + INIT_LIST_HEAD(&lc->free); + + lc->name = name; + lc->element_size = e_size; + lc->element_off = e_off; + lc->nr_elements = e_count; + lc->new_number = LC_FREE; + lc->lc_cache = cache; + lc->lc_element = element; + lc->lc_slot = slot; + + /* preallocate all objects */ + for (i = 0; i < e_count; i++) { + void *p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) + break; + memset(p, 0, lc->element_size); + e = p + e_off; + e->lc_index = i; + e->lc_number = LC_FREE; + list_add(&e->list, &lc->free); + element[i] = e; + } + if (i == e_count) + return lc; + + /* else: could not allocate all elements, give up */ + for (i--; i; i--) { + void *p = element[i]; + kmem_cache_free(cache, p - e_off); + } + kfree(lc); +out_fail: + kfree(element); + kfree(slot); + return NULL; +} + +void lc_free_by_index(struct lru_cache *lc, unsigned i) +{ + void *p = lc->lc_element[i]; + WARN_ON(!p); + if (p) { + p -= lc->element_off; + kmem_cache_free(lc->lc_cache, p); + } +} + +/** + * lc_destroy - frees memory allocated by lc_create() + * @lc: the lru cache to destroy + */ +void lc_destroy(struct lru_cache *lc) +{ + unsigned i; + if (!lc) + return; + for (i = 0; i < lc->nr_elements; i++) + lc_free_by_index(lc, i); + kfree(lc->lc_element); + kfree(lc->lc_slot); + kfree(lc); +} + +/** + * lc_reset - does a full reset for @lc and the hash table slots. + * @lc: the lru cache to operate on + * + * It is roughly the equivalent of re-allocating a fresh lru_cache object, + * basically a short cut to lc_destroy(lc); lc = lc_create(...); + */ +void lc_reset(struct lru_cache *lc) +{ + unsigned i; + + INIT_LIST_HEAD(&lc->in_use); + INIT_LIST_HEAD(&lc->lru); + INIT_LIST_HEAD(&lc->free); + lc->used = 0; + lc->hits = 0; + lc->misses = 0; + lc->starving = 0; + lc->dirty = 0; + lc->changed = 0; + lc->flags = 0; + lc->changing_element = NULL; + lc->new_number = LC_FREE; + memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); + + for (i = 0; i < lc->nr_elements; i++) { + struct lc_element *e = lc->lc_element[i]; + void *p = e; + p -= lc->element_off; + memset(p, 0, lc->element_size); + /* re-init it */ + e->lc_index = i; + e->lc_number = LC_FREE; + list_add(&e->list, &lc->free); + } +} + +/** + * lc_seq_printf_stats - print stats about @lc into @seq + * @seq: the seq_file to print into + * @lc: the lru cache to print statistics of + */ +size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) +{ + /* NOTE: + * total calls to lc_get are + * (starving + hits + misses) + * misses include "dirty" count (update from an other thread in + * progress) and "changed", when this in fact lead to an successful + * update of the cache. + */ + return seq_printf(seq, "\t%s: used:%u/%u " + "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", + lc->name, lc->used, lc->nr_elements, + lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); +} + +static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) +{ + return lc->lc_slot + (enr % lc->nr_elements); +} + + +/** + * lc_find - find element by label, if present in the hash table + * @lc: The lru_cache object + * @enr: element number + * + * Returns the pointer to an element, if the element with the requested + * "label" or element number is present in the hash table, + * or NULL if not found. Does not change the refcnt. + */ +struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) +{ + struct hlist_node *n; + struct lc_element *e; + + BUG_ON(!lc); + BUG_ON(!lc->nr_elements); + hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { + if (e->lc_number == enr) + return e; + } + return NULL; +} + +/* returned element will be "recycled" immediately */ +static struct lc_element *lc_evict(struct lru_cache *lc) +{ + struct list_head *n; + struct lc_element *e; + + if (list_empty(&lc->lru)) + return NULL; + + n = lc->lru.prev; + e = list_entry(n, struct lc_element, list); + + PARANOIA_LC_ELEMENT(lc, e); + + list_del(&e->list); + hlist_del(&e->colision); + return e; +} + +/** + * lc_del - removes an element from the cache + * @lc: The lru_cache object + * @e: The element to remove + * + * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list, + * sets @e->enr to %LC_FREE. + */ +void lc_del(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + PARANOIA_LC_ELEMENT(lc, e); + BUG_ON(e->refcnt); + + e->lc_number = LC_FREE; + hlist_del_init(&e->colision); + list_move(&e->list, &lc->free); + RETURN(); +} + +static struct lc_element *lc_get_unused_element(struct lru_cache *lc) +{ + struct list_head *n; + + if (list_empty(&lc->free)) + return lc_evict(lc); + + n = lc->free.next; + list_del(n); + return list_entry(n, struct lc_element, list); +} + +static int lc_unused_element_available(struct lru_cache *lc) +{ + if (!list_empty(&lc->free)) + return 1; /* something on the free list */ + if (!list_empty(&lc->lru)) + return 1; /* something to evict */ + + return 0; +} + + +/** + * lc_get - get element by label, maybe change the active set + * @lc: the lru cache to operate on + * @enr: the label to look up + * + * Finds an element in the cache, increases its usage count, + * "touches" and returns it. + * + * In case the requested number is not present, it needs to be added to the + * cache. Therefore it is possible that an other element becomes evicted from + * the cache. In either case, the user is notified so he is able to e.g. keep + * a persistent log of the cache changes, and therefore the objects in use. + * + * Return values: + * NULL + * The cache was marked %LC_STARVING, + * or the requested label was not in the active set + * and a changing transaction is still pending (@lc was marked %LC_DIRTY). + * Or no unused or free element could be recycled (@lc will be marked as + * %LC_STARVING, blocking further lc_get() operations). + * + * pointer to the element with the REQUESTED element number. + * In this case, it can be used right away + * + * pointer to an UNUSED element with some different element number, + * where that different number may also be %LC_FREE. + * + * In this case, the cache is marked %LC_DIRTY (blocking further changes), + * and the returned element pointer is removed from the lru list and + * hash collision chains. The user now should do whatever housekeeping + * is necessary. + * Then he must call lc_changed(lc,element_pointer), to finish + * the change. + * + * NOTE: The user needs to check the lc_number on EACH use, so he recognizes + * any cache set change. + */ +struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e; + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + RETURN(e); + } + + ++lc->misses; + + /* In case there is nothing available and we can not kick out + * the LRU element, we have to wait ... + */ + if (!lc_unused_element_available(lc)) { + __set_bit(__LC_STARVING, &lc->flags); + RETURN(NULL); + } + + /* it was not present in the active set. + * we are going to recycle an unused (or even "free") element. + * user may need to commit a transaction to record that change. + * we serialize on flags & TF_DIRTY */ + if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { + ++lc->dirty; + RETURN(NULL); + } + + e = lc_get_unused_element(lc); + BUG_ON(!e); + + clear_bit(__LC_STARVING, &lc->flags); + BUG_ON(++e->refcnt != 1); + lc->used++; + + lc->changing_element = e; + lc->new_number = enr; + + RETURN(e); +} + +/* similar to lc_get, + * but only gets a new reference on an existing element. + * you either get the requested element, or NULL. + * will be consolidated into one function. + */ +struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e; + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + } + RETURN(e); +} + +/** + * lc_changed - tell @lc that the change has been recorded + * @lc: the lru cache to operate on + * @e: the element pending label change + */ +void lc_changed(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + BUG_ON(e != lc->changing_element); + PARANOIA_LC_ELEMENT(lc, e); + ++lc->changed; + e->lc_number = lc->new_number; + list_add(&e->list, &lc->in_use); + hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); + lc->changing_element = NULL; + lc->new_number = LC_FREE; + clear_bit(__LC_DIRTY, &lc->flags); + smp_mb__after_clear_bit(); + RETURN(); +} + + +/** + * lc_put - give up refcnt of @e + * @lc: the lru cache to operate on + * @e: the element to put + * + * If refcnt reaches zero, the element is moved to the lru list, + * and a %LC_STARVING (if set) is cleared. + * Returns the new (post-decrement) refcnt. + */ +unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + PARANOIA_LC_ELEMENT(lc, e); + BUG_ON(e->refcnt == 0); + BUG_ON(e == lc->changing_element); + if (--e->refcnt == 0) { + /* move it to the front of LRU. */ + list_move(&e->list, &lc->lru); + lc->used--; + clear_bit(__LC_STARVING, &lc->flags); + smp_mb__after_clear_bit(); + } + RETURN(e->refcnt); +} + +/** + * lc_element_by_index + * @lc: the lru cache to operate on + * @i: the index of the element to return + */ +struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i) +{ + BUG_ON(i >= lc->nr_elements); + BUG_ON(lc->lc_element[i] == NULL); + BUG_ON(lc->lc_element[i]->lc_index != i); + return lc->lc_element[i]; +} + +/** + * lc_index_of + * @lc: the lru cache to operate on + * @e: the element to query for its index position in lc->element + */ +unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_LC_ELEMENT(lc, e); + return e->lc_index; +} + +/** + * lc_set - associate index with label + * @lc: the lru cache to operate on + * @enr: the label to set + * @index: the element index to associate label with. + * + * Used to initialize the active set to some previously recorded state. + */ +void lc_set(struct lru_cache *lc, unsigned int enr, int index) +{ + struct lc_element *e; + + if (index < 0 || index >= lc->nr_elements) + return; + + e = lc_element_by_index(lc, index); + e->lc_number = enr; + + hlist_del_init(&e->colision); + hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); + list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); +} + +/** + * lc_dump - Dump a complete LRU cache to seq in textual form. + * @lc: the lru cache to operate on + * @seq: the &struct seq_file pointer to seq_printf into + * @utext: user supplied "heading" or other info + * @detail: function pointer the user may provide to dump further details + * of the object the lc_element is embedded in. + */ +void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, + void (*detail) (struct seq_file *, struct lc_element *)) +{ + unsigned int nr_elements = lc->nr_elements; + struct lc_element *e; + int i; + + seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext); + for (i = 0; i < nr_elements; i++) { + e = lc_element_by_index(lc, i); + if (e->lc_number == LC_FREE) { + seq_printf(seq, "\t%2d: FREE\n", i); + } else { + seq_printf(seq, "\t%2d: %4u %4u ", i, + e->lc_number, e->refcnt); + detail(seq, e); + } + } +} + +EXPORT_SYMBOL(lc_create); +EXPORT_SYMBOL(lc_reset); +EXPORT_SYMBOL(lc_destroy); +EXPORT_SYMBOL(lc_set); +EXPORT_SYMBOL(lc_del); +EXPORT_SYMBOL(lc_try_get); +EXPORT_SYMBOL(lc_find); +EXPORT_SYMBOL(lc_get); +EXPORT_SYMBOL(lc_put); +EXPORT_SYMBOL(lc_changed); +EXPORT_SYMBOL(lc_element_by_index); +EXPORT_SYMBOL(lc_index_of); +EXPORT_SYMBOL(lc_seq_printf_stats); +EXPORT_SYMBOL(lc_seq_dump_details); -- cgit v1.2.2 From 5c828713358cb9df8aa174371edcbbb62203a490 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 23 Oct 2009 14:58:11 +0200 Subject: ratelimit: Make suppressed output messages more useful Today I got: [39648.224782] Registered led device: iwl-phy0::TX [40676.545099] __ratelimit: 246 callbacks suppressed [40676.545103] abcdef[23675]: segfault at 0 ... as you can see the ratelimit message contains a function prefix. Since this is always __ratelimit, this wont help much. This patch changes __ratelimit and printk_ratelimit to print the function name that calls ratelimit. This will pinpoint the responsible function, as long as not several different places call ratelimit with the same ratelimit state at the same time. In that case we catch only one random function that calls ratelimit after the wait period. Signed-off-by: Christian Borntraeger Cc: Dave Young Cc: Linus Torvalds CC: Andrew Morton LKML-Reference: <200910231458.11832.borntraeger@de.ibm.com> Signed-off-by: Ingo Molnar --- lib/ratelimit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 5551731ae1d4..09f5ce1810dc 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -20,7 +20,7 @@ * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks * in every @rs->ratelimit_jiffies */ -int __ratelimit(struct ratelimit_state *rs) +int ___ratelimit(struct ratelimit_state *rs, const char *func) { unsigned long flags; int ret; @@ -43,7 +43,7 @@ int __ratelimit(struct ratelimit_state *rs) if (time_is_before_jiffies(rs->begin + rs->interval)) { if (rs->missed) printk(KERN_WARNING "%s: %d callbacks suppressed\n", - __func__, rs->missed); + func, rs->missed); rs->begin = 0; rs->printed = 0; rs->missed = 0; @@ -59,4 +59,4 @@ int __ratelimit(struct ratelimit_state *rs) return ret; } -EXPORT_SYMBOL(__ratelimit); +EXPORT_SYMBOL(___ratelimit); -- cgit v1.2.2 From c44ba9f6684946b156335da6a6d55f0b8cf7cb72 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Jun 2009 21:22:58 +0200 Subject: lib/checksum.c: use 32-bit arithmetic consistently The use of 'unsigned long' variables in the 32-bit part of do_csum() is confusing at best, and potentially broken for long input on 64-bit machines. This changes the code to use 'unsigned int' instead, which makes the code behave in the same (correct) way on both 32 and 64 bit machines. Reported-by: Linus Torvalds Signed-off-by: Arnd Bergmann --- lib/checksum.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/checksum.c b/lib/checksum.c index b2e2fd468461..886b48db4f28 100644 --- a/lib/checksum.c +++ b/lib/checksum.c @@ -37,7 +37,7 @@ #include -static inline unsigned short from32to16(unsigned long x) +static inline unsigned short from32to16(unsigned int x) { /* add up 16-bit and 16-bit for 16+c bit */ x = (x & 0xffff) + (x >> 16); @@ -49,7 +49,7 @@ static inline unsigned short from32to16(unsigned long x) static unsigned int do_csum(const unsigned char *buff, int len) { int odd, count; - unsigned long result = 0; + unsigned int result = 0; if (len <= 0) goto out; @@ -73,9 +73,9 @@ static unsigned int do_csum(const unsigned char *buff, int len) } count >>= 1; /* nr of 32-bit words.. */ if (count) { - unsigned long carry = 0; + unsigned int carry = 0; do { - unsigned long w = *(unsigned int *) buff; + unsigned int w = *(unsigned int *) buff; count--; buff += 4; result += carry; -- cgit v1.2.2 From 20c1f641bb80fb272dec959a5caabed92e5a422e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Jun 2009 21:37:26 +0200 Subject: lib/checksum.c: make do_csum optional Mike Frysinger suggested that do_csum should be optional so that an architecture can use the generic checksum code but still provide an optimized fast-path for the most critical function. This can mean an implementation using inline assembly, or in case of Alpha one using 64-bit arithmetic in C. Cc: Mike Frysinger Signed-off-by: Arnd Bergmann --- lib/checksum.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/checksum.c b/lib/checksum.c index 886b48db4f28..b08c2d059024 100644 --- a/lib/checksum.c +++ b/lib/checksum.c @@ -37,6 +37,7 @@ #include +#ifndef do_csum static inline unsigned short from32to16(unsigned int x) { /* add up 16-bit and 16-bit for 16+c bit */ @@ -102,6 +103,7 @@ static unsigned int do_csum(const unsigned char *buff, int len) out: return result; } +#endif /* * This is a version of ip_compute_csum() optimized for IP headers, -- cgit v1.2.2 From 0a5549ed163520787f76b7515dfe9d9aa1c7ae37 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Jun 2009 22:52:51 +0200 Subject: lib/checksum: fix one more thinko When do_csum gets unaligned data, we really need to treat the first byte as an even byte, not an odd byte, because we swap the two halves later. Found by Mike's checksum-selftest module. Reported-by: Mike Frysinger Signed-off-by: Arnd Bergmann --- lib/checksum.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/checksum.c b/lib/checksum.c index b08c2d059024..097508732f34 100644 --- a/lib/checksum.c +++ b/lib/checksum.c @@ -57,9 +57,9 @@ static unsigned int do_csum(const unsigned char *buff, int len) odd = 1 & (unsigned long) buff; if (odd) { #ifdef __LITTLE_ENDIAN - result = *buff; -#else result += (*buff << 8); +#else + result = *buff; #endif len--; buff++; -- cgit v1.2.2 From 2840537228fba95e05cab1a6b5719c61982db279 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 6 Oct 2009 15:33:29 -0600 Subject: vsprintf: fix io/mem resource width The leading "0x" consumes field width, so leave space for it in addition to the 4 or 8 hex digits. This means we'll print "0x0000-0x01df" rather than "0x00-0x1df", for example. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- lib/vsprintf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 33bed5e67a21..7830576018c0 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -598,11 +598,11 @@ static char *resource_string(char *buf, char *end, struct resource *res, struct printf_spec spec) { #ifndef IO_RSRC_PRINTK_SIZE -#define IO_RSRC_PRINTK_SIZE 4 +#define IO_RSRC_PRINTK_SIZE 6 #endif #ifndef MEM_RSRC_PRINTK_SIZE -#define MEM_RSRC_PRINTK_SIZE 8 +#define MEM_RSRC_PRINTK_SIZE 10 #endif struct printf_spec num_spec = { .base = 16, -- cgit v1.2.2 From c91d3376e5f4277173a22f0ef9989125c318bacb Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 6 Oct 2009 15:33:34 -0600 Subject: vsprintf: add %pR support for IRQ and DMA resources Print addresses (IO port numbers and memory addresses) in hex, but print others (IRQs and DMA channels) in decimal. Only print the end if it's different from the start. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- lib/vsprintf.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 7830576018c0..1b60aedab44d 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -604,26 +604,37 @@ static char *resource_string(char *buf, char *end, struct resource *res, #ifndef MEM_RSRC_PRINTK_SIZE #define MEM_RSRC_PRINTK_SIZE 10 #endif - struct printf_spec num_spec = { + struct printf_spec hex_spec = { .base = 16, .precision = -1, .flags = SPECIAL | SMALL | ZEROPAD, }; - /* room for the actual numbers, the two "0x", -, [, ] and the final zero */ - char sym[4*sizeof(resource_size_t) + 8]; + struct printf_spec dec_spec = { + .base = 10, + .precision = -1, + .flags = 0, + }; + /* room for two actual numbers (decimal or hex), the two "0x", -, [, ] + * and the final zero */ + char sym[2*3*sizeof(resource_size_t) + 8]; char *p = sym, *pend = sym + sizeof(sym); - int size = -1; + int size = -1, addr = 0; - if (res->flags & IORESOURCE_IO) + if (res->flags & IORESOURCE_IO) { size = IO_RSRC_PRINTK_SIZE; - else if (res->flags & IORESOURCE_MEM) + addr = 1; + } else if (res->flags & IORESOURCE_MEM) { size = MEM_RSRC_PRINTK_SIZE; + addr = 1; + } *p++ = '['; - num_spec.field_width = size; - p = number(p, pend, res->start, num_spec); - *p++ = '-'; - p = number(p, pend, res->end, num_spec); + hex_spec.field_width = size; + p = number(p, pend, res->start, addr ? hex_spec : dec_spec); + if (res->start != res->end) { + *p++ = '-'; + p = number(p, pend, res->end, addr ? hex_spec : dec_spec); + } *p++ = ']'; *p = 0; -- cgit v1.2.2 From fd95541e23e2c9acb1e38cd41fc0c7cc37fceb53 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 6 Oct 2009 15:33:39 -0600 Subject: vsprintf: add %pRt, %pRf to print struct resource details This adds support for printing struct resource type and flag information. For example, "%pRt" looks like "[mem 0x80080000000-0x8008001ffff 64bit pref]", and "%pRf" looks like "[mem 0xff5e2000-0xff5e2007 pref flags 0x1]". Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- lib/vsprintf.c | 51 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 1b60aedab44d..a6e195163eb3 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -595,7 +595,7 @@ static char *symbol_string(char *buf, char *end, void *ptr, } static char *resource_string(char *buf, char *end, struct resource *res, - struct printf_spec spec) + struct printf_spec spec, const char *fmt) { #ifndef IO_RSRC_PRINTK_SIZE #define IO_RSRC_PRINTK_SIZE 6 @@ -614,9 +614,21 @@ static char *resource_string(char *buf, char *end, struct resource *res, .precision = -1, .flags = 0, }; - /* room for two actual numbers (decimal or hex), the two "0x", -, [, ] - * and the final zero */ - char sym[2*3*sizeof(resource_size_t) + 8]; + struct printf_spec str_spec = { + .field_width = -1, + .precision = 10, + .flags = LEFT, + }; + struct printf_spec flag_spec = { + .base = 16, + .precision = -1, + .flags = SPECIAL | SMALL, + }; + /* + * room for three actual numbers (decimal or hex), plus + * "[mem 0x-0x 64bit pref disabled flags 0x]\0" + */ + char sym[3*3*sizeof(resource_size_t) + 41]; char *p = sym, *pend = sym + sizeof(sym); int size = -1, addr = 0; @@ -629,12 +641,35 @@ static char *resource_string(char *buf, char *end, struct resource *res, } *p++ = '['; + if (fmt[1] == 't' || fmt[1] == 'f') { + if (res->flags & IORESOURCE_IO) + p = string(p, pend, "io ", str_spec); + else if (res->flags & IORESOURCE_MEM) + p = string(p, pend, "mem ", str_spec); + else if (res->flags & IORESOURCE_IRQ) + p = string(p, pend, "irq ", str_spec); + else if (res->flags & IORESOURCE_DMA) + p = string(p, pend, "dma ", str_spec); + } hex_spec.field_width = size; p = number(p, pend, res->start, addr ? hex_spec : dec_spec); if (res->start != res->end) { *p++ = '-'; p = number(p, pend, res->end, addr ? hex_spec : dec_spec); } + if (fmt[1] == 't' || fmt[1] == 'f') { + if (res->flags & IORESOURCE_MEM_64) + p = string(p, pend, " 64bit", str_spec); + if (res->flags & IORESOURCE_PREFETCH) + p = string(p, pend, " pref", str_spec); + if (res->flags & IORESOURCE_DISABLED) + p = string(p, pend, " disabled", str_spec); + if (fmt[1] == 'f') { + p = string(p, pend, " flags ", str_spec); + p = number(p, pend, res->flags & ~IORESOURCE_TYPE_BITS, + flag_spec); + } + } *p++ = ']'; *p = 0; @@ -812,8 +847,10 @@ static char *ip4_addr_string(char *buf, char *end, const u8 *addr, * - 'f' For simple symbolic function names without offset * - 'S' For symbolic direct pointers with offset * - 's' For symbolic direct pointers without offset - * - 'R' For a struct resource pointer, it prints the range of - * addresses (not the name nor the flags) + * - 'R' For a struct resource pointer, print: + * R address range only ([0x0-0x1f]) + * Rt type and range ([mem 0x0-0x1f 64bit pref]) + * Rf type, range, and flags ([mem 0x0-0x1f 64bit pref flags 0x1]) * - 'M' For a 6-byte MAC address, it prints the address in the * usual colon-separated hex notation * - 'm' For a 6-byte MAC address, it prints the hex address without colons @@ -844,7 +881,7 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'S': return symbol_string(buf, end, ptr, spec, *fmt); case 'R': - return resource_string(buf, end, ptr, spec); + return resource_string(buf, end, ptr, spec, fmt); case 'M': /* Colon separated: 00:01:02:03:04:05 */ case 'm': /* Contiguous: 000102030405 */ return mac_address_string(buf, end, ptr, spec, fmt); -- cgit v1.2.2 From c7dabef8a2c59e6a3de9d66fc35fb6a43ef7172d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 27 Oct 2009 13:26:47 -0600 Subject: vsprintf: use %pR, %pr instead of %pRt, %pRf Jesse accidentally applied v1 [1] of the patchset instead of v2 [2]. This is the diff between v1 and v2. The changes in this patch are: - tidied vsprintf stack buffer to shrink and compute size more accurately - use %pR for decoding and %pr for "raw" (with type and flags) instead of adding %pRt and %pRf [1] http://lkml.org/lkml/2009/10/6/491 [2] http://lkml.org/lkml/2009/10/13/441 Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- lib/vsprintf.c | 55 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index a6e195163eb3..6438cd5599ee 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -624,13 +624,19 @@ static char *resource_string(char *buf, char *end, struct resource *res, .precision = -1, .flags = SPECIAL | SMALL, }; - /* - * room for three actual numbers (decimal or hex), plus - * "[mem 0x-0x 64bit pref disabled flags 0x]\0" - */ - char sym[3*3*sizeof(resource_size_t) + 41]; + + /* 32-bit res (sizeof==4): 10 chars in dec, 10 in hex ("0x" + 8) + * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */ +#define RSRC_BUF_SIZE ((2 * sizeof(resource_size_t)) + 4) +#define FLAG_BUF_SIZE (2 * sizeof(res->flags)) +#define DECODED_BUF_SIZE sizeof("[mem - 64bit pref disabled]") +#define RAW_BUF_SIZE sizeof("[mem - flags 0x]") + char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE, + 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)]; + char *p = sym, *pend = sym + sizeof(sym); int size = -1, addr = 0; + int decode = (fmt[0] == 'R') ? 1 : 0; if (res->flags & IORESOURCE_IO) { size = IO_RSRC_PRINTK_SIZE; @@ -641,15 +647,17 @@ static char *resource_string(char *buf, char *end, struct resource *res, } *p++ = '['; - if (fmt[1] == 't' || fmt[1] == 'f') { - if (res->flags & IORESOURCE_IO) - p = string(p, pend, "io ", str_spec); - else if (res->flags & IORESOURCE_MEM) - p = string(p, pend, "mem ", str_spec); - else if (res->flags & IORESOURCE_IRQ) - p = string(p, pend, "irq ", str_spec); - else if (res->flags & IORESOURCE_DMA) - p = string(p, pend, "dma ", str_spec); + if (res->flags & IORESOURCE_IO) + p = string(p, pend, "io ", str_spec); + else if (res->flags & IORESOURCE_MEM) + p = string(p, pend, "mem ", str_spec); + else if (res->flags & IORESOURCE_IRQ) + p = string(p, pend, "irq ", str_spec); + else if (res->flags & IORESOURCE_DMA) + p = string(p, pend, "dma ", str_spec); + else { + p = string(p, pend, "??? ", str_spec); + decode = 0; } hex_spec.field_width = size; p = number(p, pend, res->start, addr ? hex_spec : dec_spec); @@ -657,21 +665,19 @@ static char *resource_string(char *buf, char *end, struct resource *res, *p++ = '-'; p = number(p, pend, res->end, addr ? hex_spec : dec_spec); } - if (fmt[1] == 't' || fmt[1] == 'f') { + if (decode) { if (res->flags & IORESOURCE_MEM_64) p = string(p, pend, " 64bit", str_spec); if (res->flags & IORESOURCE_PREFETCH) p = string(p, pend, " pref", str_spec); if (res->flags & IORESOURCE_DISABLED) p = string(p, pend, " disabled", str_spec); - if (fmt[1] == 'f') { - p = string(p, pend, " flags ", str_spec); - p = number(p, pend, res->flags & ~IORESOURCE_TYPE_BITS, - flag_spec); - } + } else { + p = string(p, pend, " flags ", str_spec); + p = number(p, pend, res->flags, flag_spec); } *p++ = ']'; - *p = 0; + *p = '\0'; return string(buf, end, sym, spec); } @@ -847,10 +853,8 @@ static char *ip4_addr_string(char *buf, char *end, const u8 *addr, * - 'f' For simple symbolic function names without offset * - 'S' For symbolic direct pointers with offset * - 's' For symbolic direct pointers without offset - * - 'R' For a struct resource pointer, print: - * R address range only ([0x0-0x1f]) - * Rt type and range ([mem 0x0-0x1f 64bit pref]) - * Rf type, range, and flags ([mem 0x0-0x1f 64bit pref flags 0x1]) + * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref] + * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201] * - 'M' For a 6-byte MAC address, it prints the address in the * usual colon-separated hex notation * - 'm' For a 6-byte MAC address, it prints the hex address without colons @@ -881,6 +885,7 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'S': return symbol_string(buf, end, ptr, spec, *fmt); case 'R': + case 'r': return resource_string(buf, end, ptr, spec, fmt); case 'M': /* Colon separated: 00:01:02:03:04:05 */ case 'm': /* Contiguous: 000102030405 */ -- cgit v1.2.2 From 5740afdb68abadc473fd5392df733558a58c1254 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:18 +0900 Subject: swiotlb: Add swiotlb_free() function swiotlb_free() function frees all allocated memory for swiotlb. We need to initialize swiotlb before IOMMU initialization (x86 and powerpc needs to allocate memory from bootmem allocator). If IOMMU initialization is successful, we need to free swiotlb resource (don't want to waste 64MB). Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-8-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: build fix for the !CONFIG_SWIOTLB case ] Signed-off-by: Ingo Molnar --- lib/swiotlb.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index ac25cd28e807..eee512b63f17 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -97,6 +97,8 @@ static phys_addr_t *io_tlb_orig_addr; */ static DEFINE_SPINLOCK(io_tlb_lock); +static int late_alloc; + static int __init setup_io_tlb_npages(char *str) { @@ -262,6 +264,8 @@ swiotlb_late_init_with_default_size(size_t default_size) swiotlb_print_info(bytes); + late_alloc = 1; + return 0; cleanup4: @@ -281,6 +285,32 @@ cleanup1: return -ENOMEM; } +void __init swiotlb_free(void) +{ + if (!io_tlb_overflow_buffer) + return; + + if (late_alloc) { + free_pages((unsigned long)io_tlb_overflow_buffer, + get_order(io_tlb_overflow)); + free_pages((unsigned long)io_tlb_orig_addr, + get_order(io_tlb_nslabs * sizeof(phys_addr_t))); + free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * + sizeof(int))); + free_pages((unsigned long)io_tlb_start, + get_order(io_tlb_nslabs << IO_TLB_SHIFT)); + } else { + free_bootmem_late(__pa(io_tlb_overflow_buffer), + io_tlb_overflow); + free_bootmem_late(__pa(io_tlb_orig_addr), + io_tlb_nslabs * sizeof(phys_addr_t)); + free_bootmem_late(__pa(io_tlb_list), + io_tlb_nslabs * sizeof(int)); + free_bootmem_late(__pa(io_tlb_start), + io_tlb_nslabs << IO_TLB_SHIFT); + } +} + static int is_swiotlb_buffer(phys_addr_t paddr) { return paddr >= virt_to_phys(io_tlb_start) && -- cgit v1.2.2 From ad32e8cb86e7894aac51c8963eaa9f36bb8a4e14 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:19 +0900 Subject: swiotlb: Defer swiotlb init printing, export swiotlb_print_info() This enables us to avoid printing swiotlb memory info when we initialize swiotlb. After swiotlb initialization, we could find that we don't need swiotlb. This patch removes the code to print swiotlb memory info in swiotlb_init() and exports the function to do that. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com Cc: tony.luck@intel.com Cc: benh@kernel.crashing.org LKML-Reference: <1257849980-22640-9-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: merge up conflict ] Signed-off-by: Ingo Molnar --- lib/swiotlb.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index eee512b63f17..0c12d7cce300 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -123,8 +123,9 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, return phys_to_dma(hwdev, virt_to_phys(address)); } -static void swiotlb_print_info(unsigned long bytes) +void swiotlb_print_info(void) { + unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; phys_addr_t pstart, pend; pstart = virt_to_phys(io_tlb_start); @@ -142,7 +143,7 @@ static void swiotlb_print_info(unsigned long bytes) * structures for the software IO TLB used to implement the DMA API. */ void __init -swiotlb_init_with_default_size(size_t default_size) +swiotlb_init_with_default_size(size_t default_size, int verbose) { unsigned long i, bytes; @@ -178,14 +179,14 @@ swiotlb_init_with_default_size(size_t default_size) io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); if (!io_tlb_overflow_buffer) panic("Cannot allocate SWIOTLB overflow buffer!\n"); - - swiotlb_print_info(bytes); + if (verbose) + swiotlb_print_info(); } void __init -swiotlb_init(void) +swiotlb_init(int verbose) { - swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ + swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */ } /* @@ -262,7 +263,7 @@ swiotlb_late_init_with_default_size(size_t default_size) if (!io_tlb_overflow_buffer) goto cleanup4; - swiotlb_print_info(bytes); + swiotlb_print_info(); late_alloc = 1; -- cgit v1.2.2 From 75f1cdf1dda92cae037ec848ae63690d91913eac Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:20 +0900 Subject: x86: Handle HW IOMMU initialization failure gracefully If HW IOMMU initialization fails (Intel VT-d often does this, typically due to BIOS bugs), we fall back to nommu. It doesn't work for the majority since nowadays we have more than 4GB memory so we must use swiotlb instead of nommu. The problem is that it's too late to initialize swiotlb when HW IOMMU initialization fails. We need to allocate swiotlb memory earlier from bootmem allocator. Chris explained the issue in detail: http://marc.info/?l=linux-kernel&m=125657444317079&w=2 The current x86 IOMMU initialization sequence is too complicated and handling the above issue makes it more hacky. This patch changes x86 IOMMU initialization sequence to handle the above issue cleanly. The new x86 IOMMU initialization sequence are: 1. we initialize the swiotlb (and setting swiotlb to 1) in the case of (max_pfn > MAX_DMA32_PFN && !no_iommu). dma_ops is set to swiotlb_dma_ops or nommu_dma_ops. if swiotlb usage is forced by the boot option, we finish here. 2. we call the detection functions of all the IOMMUs 3. the detection function sets x86_init.iommu.iommu_init to the IOMMU initialization function (so we can avoid calling the initialization functions of all the IOMMUs needlessly). 4. if the IOMMU initialization function doesn't need to swiotlb then sets swiotlb to zero (e.g. the initialization is sucessful). 5. if we find that swiotlb is set to zero, we free swiotlb resource. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-10-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- lib/swiotlb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 0c12d7cce300..e6755a0574fb 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -109,8 +109,10 @@ setup_io_tlb_npages(char *str) } if (*str == ',') ++str; - if (!strcmp(str, "force")) + if (!strcmp(str, "force")) { swiotlb_force = 1; + swiotlb = 1; + } return 1; } __setup("swiotlb=", setup_io_tlb_npages); -- cgit v1.2.2 From 83ac201b4f06eb8aeb7ac93cf162651ba30e0b28 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 3 Apr 2009 02:22:26 -0700 Subject: sysctl: Remove dead code from sysctl_check Now that the sys_sysctl is now a compatibility wrapper around /proc/sys we can remove much of sysctl_check and reduce it to a few remaining sanity checks. This completely decouples it from the binary sysctl system call. Little things like ensuring that the sysctl has not already been registered are all that remain. Signed-off-by: Eric W. Biederman --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 234ceb10861f..01f2d1139e9a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -912,7 +912,7 @@ config LATENCYTOP config SYSCTL_SYSCALL_CHECK bool "Sysctl checks" - depends on SYSCTL_SYSCALL + depends on SYSCTL ---help--- sys_sysctl uses binary paths that have been found challenging to properly maintain and use. This enables checks that help -- cgit v1.2.2 From b18485e7acfe1a634615d1c628ef644c0d58d472 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 12 Nov 2009 00:03:28 +0900 Subject: swiotlb: Remove the swiotlb variable usage POWERPC doesn't expect it to be used. This fixes the linux-next build failure reported by Stephen Rothwell: lib/swiotlb.c: In function 'setup_io_tlb_npages': lib/swiotlb.c:114: error: 'swiotlb' undeclared (first use in this function) Reported-by: Stephen Rothwell Signed-off-by: FUJITA Tomonori Cc: peterz@infradead.org LKML-Reference: <20091112000258F.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- lib/swiotlb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index e6755a0574fb..795472d8ae24 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -109,10 +109,9 @@ setup_io_tlb_npages(char *str) } if (*str == ',') ++str; - if (!strcmp(str, "force")) { + if (!strcmp(str, "force")) swiotlb_force = 1; - swiotlb = 1; - } + return 1; } __setup("swiotlb=", setup_io_tlb_npages); -- cgit v1.2.2 From dc186ad741c12ae9ecac8b89e317ef706fdaf8f6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Nov 2009 01:09:48 +0900 Subject: workqueue: Add debugobjects support Add debugobject support to track the life time of work_structs. While at it, remove duplicate definition of INIT_DELAYED_WORK_ON_STACK(). Signed-off-by: Thomas Gleixner Signed-off-by: Tejun Heo --- lib/Kconfig.debug | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 234ceb10861f..c91f0519d493 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -298,6 +298,14 @@ config DEBUG_OBJECTS_TIMERS timer routines to track the life time of timer objects and validate the timer operations. +config DEBUG_OBJECTS_WORK + bool "Debug work objects" + depends on DEBUG_OBJECTS + help + If you say Y here, additional code will be inserted into the + work queue routines to track the life time of work objects and + validate the work operations. + config DEBUG_OBJECTS_ENABLE_DEFAULT int "debug_objects bootup default value (0-1)" range 0 1 -- cgit v1.2.2 From 8bfb2f8e655b9d0c45fde679fcd5fd97e34513db Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Dec 2009 12:10:16 -0800 Subject: rcu: Make RCU's CPU-stall detector be default The RCU_CPU_STALL_DETECTOR costs almost nothing and has located some bugs that might otherwise have been difficult to track down. Make it be default for the TREE RCU implementations. The vmlinux size impact is limited (on 64-bit x86 defconfig): text data bss dec hex filename 8440248 1260076 995588 10695912 a334e8 vmlinux.before 8440774 1260060 995588 10696422 a336e6 vmlinux.after +526 bytes - acceptable default cost. For RAM starved systems, TINY_RCU does not support CPU-stall detection and is much smaller, but then again it is a uniprocessor... Signed-off-by: Paul E. McKenney Acked-by: Lai Jiangshan Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12597846162906-git-send-email-> [ v2: added image size calculations to the changelog ] Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 891155817bc6..50e0e78259e7 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -749,7 +749,7 @@ config RCU_TORTURE_TEST_RUNNABLE config RCU_CPU_STALL_DETECTOR bool "Check for stalled CPUs delaying RCU grace periods" depends on TREE_RCU || TREE_PREEMPT_RCU - default n + default y help This option causes RCU to printk information on which CPUs are delaying the current grace period, but only when -- cgit v1.2.2 From 94e2bd688820aed72b4f8092f88c2ccf64e003de Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 16 Oct 2009 15:20:49 +0200 Subject: tree-wide: fix some typos and punctuation in comments fix some typos and punctuation in comments Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Jiri Kosina --- lib/idr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 80ca9aca038b..1cac726c44bc 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -281,7 +281,7 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) /** * idr_get_new_above - allocate new idr entry above or equal to a start id * @idp: idr handle - * @ptr: pointer you want associated with the ide + * @ptr: pointer you want associated with the id * @start_id: id to start search at * @id: pointer to the allocated handle * @@ -313,7 +313,7 @@ EXPORT_SYMBOL(idr_get_new_above); /** * idr_get_new - allocate new idr entry * @idp: idr handle - * @ptr: pointer you want associated with the ide + * @ptr: pointer you want associated with the id * @id: pointer to the allocated handle * * This is the allocate id function. It should be called with any -- cgit v1.2.2 From af901ca181d92aac3a7dc265144a9081a86d8f39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Goddard=20Rosa?= Date: Sat, 14 Nov 2009 13:09:05 -0200 Subject: tree-wide: fix assorted typos all over the place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That is "success", "unknown", "through", "performance", "[re|un]mapping" , "access", "default", "reasonable", "[con]currently", "temperature" , "channel", "[un]used", "application", "example","hierarchy", "therefore" , "[over|under]flow", "contiguous", "threshold", "enough" and others. Signed-off-by: André Goddard Rosa Signed-off-by: Jiri Kosina --- lib/Kconfig.debug | 2 +- lib/decompress_bunzip2.c | 2 +- lib/dma-debug.c | 2 +- lib/swiotlb.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 234ceb10861f..456b2bc6b1ff 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -105,7 +105,7 @@ config DEBUG_SECTION_MISMATCH bool "Enable full Section mismatch analysis" depends on UNDEFINED # This option is on purpose disabled for now. - # It will be enabled when we are down to a resonable number + # It will be enabled when we are down to a reasonable number # of section mismatch warnings (< 10 for an allyesconfig build) help The section mismatch analysis checks if there are illegal diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 600f473a5610..76074209f9a2 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -299,7 +299,7 @@ static int INIT get_next_block(struct bunzip_data *bd) again when using them (during symbol decoding).*/ base = hufGroup->base-1; limit = hufGroup->limit-1; - /* Calculate permute[]. Concurently, initialize + /* Calculate permute[]. Concurrently, initialize * temp[] and limit[]. */ pp = 0; for (i = minLen; i <= maxLen; i++) { diff --git a/lib/dma-debug.c b/lib/dma-debug.c index ce6b7eabf674..d9b08e0f7f55 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -259,7 +259,7 @@ static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket, * times. Without a hardware IOMMU this results in the * same device addresses being put into the dma-debug * hash multiple times too. This can result in false - * positives being reported. Therfore we implement a + * positives being reported. Therefore we implement a * best-fit algorithm here which returns the entry from * the hash which fits best to the reference value * instead of the first-fit. diff --git a/lib/swiotlb.c b/lib/swiotlb.c index ac25cd28e807..853907e45868 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -453,7 +453,7 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) /* * Return the buffer to the free list by setting the corresponding - * entries to indicate the number of contigous entries available. + * entries to indicate the number of contiguous entries available. * While returning the entries to the free list, we merge the entries * with slots below and above the pool being returned. */ -- cgit v1.2.2 From f01eb3640308c005d31b29d0a8bc2b7acb4e3f75 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 12 Dec 2009 14:46:33 -0800 Subject: [BKL] add 'might_sleep()' to the outermost lock taker As shown by the previous patch (6698e3472: "tty: Fix BKL taken under a spinlock bug introduced in the BKL split") the BKL removal is prone to some subtle issues, where removing the BKL in one place may in fact make a previously nested BKL call the new outer call, and then prone to nasty deadlocks with other spinlocks. In general, we should never take the BKL while we're holding a spinlock, so let's just add a "might_sleep()" to it (even though the BKL doesn't technically sleep - at least not yet), and we'll get nice warnings the next time this kind of problem happens during BKL removal. Acked-and-Tested-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- lib/kernel_lock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 4ebfa5a164d7..5526b46aba94 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -122,8 +122,10 @@ void __lockfunc _lock_kernel(const char *func, const char *file, int line) trace_lock_kernel(func, file, line); - if (likely(!depth)) + if (likely(!depth)) { + might_sleep(); __lock_kernel(); + } current->lock_depth = depth; } -- cgit v1.2.2