diff options
| author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2009-12-09 01:14:38 -0500 |
|---|---|---|
| committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2009-12-09 01:14:38 -0500 |
| commit | bcd6acd51f3d4d1ada201e9bc5c40a31d6d80c71 (patch) | |
| tree | 2f6dffd2d3e4dd67355a224de7e7a960335a92fd /lib | |
| parent | 11c34c7deaeeebcee342cbc35e1bb2a6711b2431 (diff) | |
| parent | 3ff6a468b45b5dfeb0e903e56f4eb27d34b2437c (diff) | |
Merge commit 'origin/master' into next
Conflicts:
include/linux/kvm.h
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/Kconfig | 3 | ||||
| -rw-r--r-- | lib/Kconfig.debug | 4 | ||||
| -rw-r--r-- | lib/Makefile | 2 | ||||
| -rw-r--r-- | lib/kernel_lock.c | 20 | ||||
| -rw-r--r-- | lib/lru_cache.c | 560 | ||||
| -rw-r--r-- | lib/radix-tree.c | 5 | ||||
| -rw-r--r-- | lib/ratelimit.c | 45 | ||||
| -rw-r--r-- | lib/swiotlb.c | 46 |
8 files changed, 648 insertions, 37 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index bb1326d3839c..1cfe51628e1b 100644 --- a/lib/Kconfig +++ b/lib/Kconfig | |||
| @@ -200,4 +200,7 @@ config NLATTR | |||
| 200 | config GENERIC_ATOMIC64 | 200 | config GENERIC_ATOMIC64 |
| 201 | bool | 201 | bool |
| 202 | 202 | ||
| 203 | config LRU_CACHE | ||
| 204 | tristate | ||
| 205 | |||
| 203 | endmenu | 206 | endmenu |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 234ceb10861f..0b8f35918a36 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
| @@ -750,7 +750,7 @@ config RCU_TORTURE_TEST_RUNNABLE | |||
| 750 | config RCU_CPU_STALL_DETECTOR | 750 | config RCU_CPU_STALL_DETECTOR |
| 751 | bool "Check for stalled CPUs delaying RCU grace periods" | 751 | bool "Check for stalled CPUs delaying RCU grace periods" |
| 752 | depends on TREE_RCU || TREE_PREEMPT_RCU | 752 | depends on TREE_RCU || TREE_PREEMPT_RCU |
| 753 | default n | 753 | default y |
| 754 | help | 754 | help |
| 755 | This option causes RCU to printk information on which | 755 | This option causes RCU to printk information on which |
| 756 | CPUs are delaying the current grace period, but only when | 756 | CPUs are delaying the current grace period, but only when |
| @@ -912,7 +912,7 @@ config LATENCYTOP | |||
| 912 | 912 | ||
| 913 | config SYSCTL_SYSCALL_CHECK | 913 | config SYSCTL_SYSCALL_CHECK |
| 914 | bool "Sysctl checks" | 914 | bool "Sysctl checks" |
| 915 | depends on SYSCTL_SYSCALL | 915 | depends on SYSCTL |
| 916 | ---help--- | 916 | ---help--- |
| 917 | sys_sysctl uses binary paths that have been found challenging | 917 | sys_sysctl uses binary paths that have been found challenging |
| 918 | to properly maintain and use. This enables checks that help | 918 | to properly maintain and use. This enables checks that help |
diff --git a/lib/Makefile b/lib/Makefile index 2e78277eff9d..347ad8db29d3 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
| @@ -91,6 +91,8 @@ obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o | |||
| 91 | 91 | ||
| 92 | obj-$(CONFIG_NLATTR) += nlattr.o | 92 | obj-$(CONFIG_NLATTR) += nlattr.o |
| 93 | 93 | ||
| 94 | obj-$(CONFIG_LRU_CACHE) += lru_cache.o | ||
| 95 | |||
| 94 | obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o | 96 | obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o |
| 95 | 97 | ||
| 96 | obj-$(CONFIG_GENERIC_CSUM) += checksum.o | 98 | obj-$(CONFIG_GENERIC_CSUM) += checksum.o |
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 39f1029e3525..4ebfa5a164d7 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c | |||
| @@ -5,10 +5,13 @@ | |||
| 5 | * relegated to obsolescence, but used by various less | 5 | * relegated to obsolescence, but used by various less |
| 6 | * important (or lazy) subsystems. | 6 | * important (or lazy) subsystems. |
| 7 | */ | 7 | */ |
| 8 | #include <linux/smp_lock.h> | ||
| 9 | #include <linux/module.h> | 8 | #include <linux/module.h> |
| 10 | #include <linux/kallsyms.h> | 9 | #include <linux/kallsyms.h> |
| 11 | #include <linux/semaphore.h> | 10 | #include <linux/semaphore.h> |
| 11 | #include <linux/smp_lock.h> | ||
| 12 | |||
| 13 | #define CREATE_TRACE_POINTS | ||
| 14 | #include <trace/events/bkl.h> | ||
| 12 | 15 | ||
| 13 | /* | 16 | /* |
| 14 | * The 'big kernel lock' | 17 | * The 'big kernel lock' |
| @@ -113,21 +116,26 @@ static inline void __unlock_kernel(void) | |||
| 113 | * This cannot happen asynchronously, so we only need to | 116 | * This cannot happen asynchronously, so we only need to |
| 114 | * worry about other CPU's. | 117 | * worry about other CPU's. |
| 115 | */ | 118 | */ |
| 116 | void __lockfunc lock_kernel(void) | 119 | void __lockfunc _lock_kernel(const char *func, const char *file, int line) |
| 117 | { | 120 | { |
| 118 | int depth = current->lock_depth+1; | 121 | int depth = current->lock_depth + 1; |
| 122 | |||
| 123 | trace_lock_kernel(func, file, line); | ||
| 124 | |||
| 119 | if (likely(!depth)) | 125 | if (likely(!depth)) |
| 120 | __lock_kernel(); | 126 | __lock_kernel(); |
| 121 | current->lock_depth = depth; | 127 | current->lock_depth = depth; |
| 122 | } | 128 | } |
| 123 | 129 | ||
| 124 | void __lockfunc unlock_kernel(void) | 130 | void __lockfunc _unlock_kernel(const char *func, const char *file, int line) |
| 125 | { | 131 | { |
| 126 | BUG_ON(current->lock_depth < 0); | 132 | BUG_ON(current->lock_depth < 0); |
| 127 | if (likely(--current->lock_depth < 0)) | 133 | if (likely(--current->lock_depth < 0)) |
| 128 | __unlock_kernel(); | 134 | __unlock_kernel(); |
| 135 | |||
| 136 | trace_unlock_kernel(func, file, line); | ||
| 129 | } | 137 | } |
| 130 | 138 | ||
| 131 | EXPORT_SYMBOL(lock_kernel); | 139 | EXPORT_SYMBOL(_lock_kernel); |
| 132 | EXPORT_SYMBOL(unlock_kernel); | 140 | EXPORT_SYMBOL(_unlock_kernel); |
| 133 | 141 | ||
diff --git a/lib/lru_cache.c b/lib/lru_cache.c new file mode 100644 index 000000000000..270de9d31b8c --- /dev/null +++ b/lib/lru_cache.c | |||
| @@ -0,0 +1,560 @@ | |||
| 1 | /* | ||
| 2 | lru_cache.c | ||
| 3 | |||
| 4 | This file is part of DRBD by Philipp Reisner and Lars Ellenberg. | ||
| 5 | |||
| 6 | Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. | ||
| 7 | Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. | ||
| 8 | Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. | ||
| 9 | |||
| 10 | drbd is free software; you can redistribute it and/or modify | ||
| 11 | it under the terms of the GNU General Public License as published by | ||
| 12 | the Free Software Foundation; either version 2, or (at your option) | ||
| 13 | any later version. | ||
| 14 | |||
| 15 | drbd is distributed in the hope that it will be useful, | ||
| 16 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 18 | GNU General Public License for more details. | ||
| 19 | |||
| 20 | You should have received a copy of the GNU General Public License | ||
| 21 | along with drbd; see the file COPYING. If not, write to | ||
| 22 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
| 23 | |||
| 24 | */ | ||
| 25 | |||
| 26 | #include <linux/module.h> | ||
| 27 | #include <linux/bitops.h> | ||
| 28 | #include <linux/slab.h> | ||
| 29 | #include <linux/string.h> /* for memset */ | ||
| 30 | #include <linux/seq_file.h> /* for seq_printf */ | ||
| 31 | #include <linux/lru_cache.h> | ||
| 32 | |||
| 33 | MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " | ||
| 34 | "Lars Ellenberg <lars@linbit.com>"); | ||
| 35 | MODULE_DESCRIPTION("lru_cache - Track sets of hot objects"); | ||
| 36 | MODULE_LICENSE("GPL"); | ||
| 37 | |||
| 38 | /* this is developers aid only. | ||
| 39 | * it catches concurrent access (lack of locking on the users part) */ | ||
| 40 | #define PARANOIA_ENTRY() do { \ | ||
| 41 | BUG_ON(!lc); \ | ||
| 42 | BUG_ON(!lc->nr_elements); \ | ||
| 43 | BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \ | ||
| 44 | } while (0) | ||
| 45 | |||
| 46 | #define RETURN(x...) do { \ | ||
| 47 | clear_bit(__LC_PARANOIA, &lc->flags); \ | ||
| 48 | smp_mb__after_clear_bit(); return x ; } while (0) | ||
| 49 | |||
| 50 | /* BUG() if e is not one of the elements tracked by lc */ | ||
| 51 | #define PARANOIA_LC_ELEMENT(lc, e) do { \ | ||
| 52 | struct lru_cache *lc_ = (lc); \ | ||
| 53 | struct lc_element *e_ = (e); \ | ||
| 54 | unsigned i = e_->lc_index; \ | ||
| 55 | BUG_ON(i >= lc_->nr_elements); \ | ||
| 56 | BUG_ON(lc_->lc_element[i] != e_); } while (0) | ||
| 57 | |||
| 58 | /** | ||
| 59 | * lc_create - prepares to track objects in an active set | ||
| 60 | * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details | ||
| 61 | * @e_count: number of elements allowed to be active simultaneously | ||
| 62 | * @e_size: size of the tracked objects | ||
| 63 | * @e_off: offset to the &struct lc_element member in a tracked object | ||
| 64 | * | ||
| 65 | * Returns a pointer to a newly initialized struct lru_cache on success, | ||
| 66 | * or NULL on (allocation) failure. | ||
| 67 | */ | ||
| 68 | struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, | ||
| 69 | unsigned e_count, size_t e_size, size_t e_off) | ||
| 70 | { | ||
| 71 | struct hlist_head *slot = NULL; | ||
| 72 | struct lc_element **element = NULL; | ||
| 73 | struct lru_cache *lc; | ||
| 74 | struct lc_element *e; | ||
| 75 | unsigned cache_obj_size = kmem_cache_size(cache); | ||
| 76 | unsigned i; | ||
| 77 | |||
| 78 | WARN_ON(cache_obj_size < e_size); | ||
| 79 | if (cache_obj_size < e_size) | ||
| 80 | return NULL; | ||
| 81 | |||
| 82 | /* e_count too big; would probably fail the allocation below anyways. | ||
| 83 | * for typical use cases, e_count should be few thousand at most. */ | ||
| 84 | if (e_count > LC_MAX_ACTIVE) | ||
| 85 | return NULL; | ||
| 86 | |||
| 87 | slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL); | ||
| 88 | if (!slot) | ||
| 89 | goto out_fail; | ||
| 90 | element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL); | ||
| 91 | if (!element) | ||
| 92 | goto out_fail; | ||
| 93 | |||
| 94 | lc = kzalloc(sizeof(*lc), GFP_KERNEL); | ||
| 95 | if (!lc) | ||
| 96 | goto out_fail; | ||
| 97 | |||
| 98 | INIT_LIST_HEAD(&lc->in_use); | ||
| 99 | INIT_LIST_HEAD(&lc->lru); | ||
| 100 | INIT_LIST_HEAD(&lc->free); | ||
| 101 | |||
| 102 | lc->name = name; | ||
| 103 | lc->element_size = e_size; | ||
| 104 | lc->element_off = e_off; | ||
| 105 | lc->nr_elements = e_count; | ||
| 106 | lc->new_number = LC_FREE; | ||
| 107 | lc->lc_cache = cache; | ||
| 108 | lc->lc_element = element; | ||
| 109 | lc->lc_slot = slot; | ||
| 110 | |||
| 111 | /* preallocate all objects */ | ||
| 112 | for (i = 0; i < e_count; i++) { | ||
| 113 | void *p = kmem_cache_alloc(cache, GFP_KERNEL); | ||
| 114 | if (!p) | ||
| 115 | break; | ||
| 116 | memset(p, 0, lc->element_size); | ||
| 117 | e = p + e_off; | ||
| 118 | e->lc_index = i; | ||
| 119 | e->lc_number = LC_FREE; | ||
| 120 | list_add(&e->list, &lc->free); | ||
| 121 | element[i] = e; | ||
| 122 | } | ||
| 123 | if (i == e_count) | ||
| 124 | return lc; | ||
| 125 | |||
| 126 | /* else: could not allocate all elements, give up */ | ||
| 127 | for (i--; i; i--) { | ||
| 128 | void *p = element[i]; | ||
| 129 | kmem_cache_free(cache, p - e_off); | ||
| 130 | } | ||
| 131 | kfree(lc); | ||
| 132 | out_fail: | ||
| 133 | kfree(element); | ||
| 134 | kfree(slot); | ||
| 135 | return NULL; | ||
| 136 | } | ||
| 137 | |||
| 138 | void lc_free_by_index(struct lru_cache *lc, unsigned i) | ||
| 139 | { | ||
| 140 | void *p = lc->lc_element[i]; | ||
| 141 | WARN_ON(!p); | ||
| 142 | if (p) { | ||
| 143 | p -= lc->element_off; | ||
| 144 | kmem_cache_free(lc->lc_cache, p); | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | /** | ||
| 149 | * lc_destroy - frees memory allocated by lc_create() | ||
| 150 | * @lc: the lru cache to destroy | ||
| 151 | */ | ||
| 152 | void lc_destroy(struct lru_cache *lc) | ||
| 153 | { | ||
| 154 | unsigned i; | ||
| 155 | if (!lc) | ||
| 156 | return; | ||
| 157 | for (i = 0; i < lc->nr_elements; i++) | ||
| 158 | lc_free_by_index(lc, i); | ||
| 159 | kfree(lc->lc_element); | ||
| 160 | kfree(lc->lc_slot); | ||
| 161 | kfree(lc); | ||
| 162 | } | ||
| 163 | |||
| 164 | /** | ||
| 165 | * lc_reset - does a full reset for @lc and the hash table slots. | ||
| 166 | * @lc: the lru cache to operate on | ||
| 167 | * | ||
| 168 | * It is roughly the equivalent of re-allocating a fresh lru_cache object, | ||
| 169 | * basically a short cut to lc_destroy(lc); lc = lc_create(...); | ||
| 170 | */ | ||
| 171 | void lc_reset(struct lru_cache *lc) | ||
| 172 | { | ||
| 173 | unsigned i; | ||
| 174 | |||
| 175 | INIT_LIST_HEAD(&lc->in_use); | ||
| 176 | INIT_LIST_HEAD(&lc->lru); | ||
| 177 | INIT_LIST_HEAD(&lc->free); | ||
| 178 | lc->used = 0; | ||
| 179 | lc->hits = 0; | ||
| 180 | lc->misses = 0; | ||
| 181 | lc->starving = 0; | ||
| 182 | lc->dirty = 0; | ||
| 183 | lc->changed = 0; | ||
| 184 | lc->flags = 0; | ||
| 185 | lc->changing_element = NULL; | ||
| 186 | lc->new_number = LC_FREE; | ||
| 187 | memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); | ||
| 188 | |||
| 189 | for (i = 0; i < lc->nr_elements; i++) { | ||
| 190 | struct lc_element *e = lc->lc_element[i]; | ||
| 191 | void *p = e; | ||
| 192 | p -= lc->element_off; | ||
| 193 | memset(p, 0, lc->element_size); | ||
| 194 | /* re-init it */ | ||
| 195 | e->lc_index = i; | ||
| 196 | e->lc_number = LC_FREE; | ||
| 197 | list_add(&e->list, &lc->free); | ||
| 198 | } | ||
| 199 | } | ||
| 200 | |||
| 201 | /** | ||
| 202 | * lc_seq_printf_stats - print stats about @lc into @seq | ||
| 203 | * @seq: the seq_file to print into | ||
| 204 | * @lc: the lru cache to print statistics of | ||
| 205 | */ | ||
| 206 | size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) | ||
| 207 | { | ||
| 208 | /* NOTE: | ||
| 209 | * total calls to lc_get are | ||
| 210 | * (starving + hits + misses) | ||
| 211 | * misses include "dirty" count (update from an other thread in | ||
| 212 | * progress) and "changed", when this in fact lead to an successful | ||
| 213 | * update of the cache. | ||
| 214 | */ | ||
| 215 | return seq_printf(seq, "\t%s: used:%u/%u " | ||
| 216 | "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", | ||
| 217 | lc->name, lc->used, lc->nr_elements, | ||
| 218 | lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); | ||
| 219 | } | ||
| 220 | |||
| 221 | static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) | ||
| 222 | { | ||
| 223 | return lc->lc_slot + (enr % lc->nr_elements); | ||
| 224 | } | ||
| 225 | |||
| 226 | |||
| 227 | /** | ||
| 228 | * lc_find - find element by label, if present in the hash table | ||
| 229 | * @lc: The lru_cache object | ||
| 230 | * @enr: element number | ||
| 231 | * | ||
| 232 | * Returns the pointer to an element, if the element with the requested | ||
| 233 | * "label" or element number is present in the hash table, | ||
| 234 | * or NULL if not found. Does not change the refcnt. | ||
| 235 | */ | ||
| 236 | struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) | ||
| 237 | { | ||
| 238 | struct hlist_node *n; | ||
| 239 | struct lc_element *e; | ||
| 240 | |||
| 241 | BUG_ON(!lc); | ||
| 242 | BUG_ON(!lc->nr_elements); | ||
| 243 | hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { | ||
| 244 | if (e->lc_number == enr) | ||
| 245 | return e; | ||
| 246 | } | ||
| 247 | return NULL; | ||
| 248 | } | ||
| 249 | |||
| 250 | /* returned element will be "recycled" immediately */ | ||
| 251 | static struct lc_element *lc_evict(struct lru_cache *lc) | ||
| 252 | { | ||
| 253 | struct list_head *n; | ||
| 254 | struct lc_element *e; | ||
| 255 | |||
| 256 | if (list_empty(&lc->lru)) | ||
| 257 | return NULL; | ||
| 258 | |||
| 259 | n = lc->lru.prev; | ||
| 260 | e = list_entry(n, struct lc_element, list); | ||
| 261 | |||
| 262 | PARANOIA_LC_ELEMENT(lc, e); | ||
| 263 | |||
| 264 | list_del(&e->list); | ||
| 265 | hlist_del(&e->colision); | ||
| 266 | return e; | ||
| 267 | } | ||
| 268 | |||
| 269 | /** | ||
| 270 | * lc_del - removes an element from the cache | ||
| 271 | * @lc: The lru_cache object | ||
| 272 | * @e: The element to remove | ||
| 273 | * | ||
| 274 | * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list, | ||
| 275 | * sets @e->enr to %LC_FREE. | ||
| 276 | */ | ||
| 277 | void lc_del(struct lru_cache *lc, struct lc_element *e) | ||
| 278 | { | ||
| 279 | PARANOIA_ENTRY(); | ||
| 280 | PARANOIA_LC_ELEMENT(lc, e); | ||
| 281 | BUG_ON(e->refcnt); | ||
| 282 | |||
| 283 | e->lc_number = LC_FREE; | ||
| 284 | hlist_del_init(&e->colision); | ||
| 285 | list_move(&e->list, &lc->free); | ||
| 286 | RETURN(); | ||
| 287 | } | ||
| 288 | |||
| 289 | static struct lc_element *lc_get_unused_element(struct lru_cache *lc) | ||
| 290 | { | ||
| 291 | struct list_head *n; | ||
| 292 | |||
| 293 | if (list_empty(&lc->free)) | ||
| 294 | return lc_evict(lc); | ||
| 295 | |||
| 296 | n = lc->free.next; | ||
| 297 | list_del(n); | ||
| 298 | return list_entry(n, struct lc_element, list); | ||
| 299 | } | ||
| 300 | |||
| 301 | static int lc_unused_element_available(struct lru_cache *lc) | ||
| 302 | { | ||
| 303 | if (!list_empty(&lc->free)) | ||
| 304 | return 1; /* something on the free list */ | ||
| 305 | if (!list_empty(&lc->lru)) | ||
| 306 | return 1; /* something to evict */ | ||
| 307 | |||
| 308 | return 0; | ||
| 309 | } | ||
| 310 | |||
| 311 | |||
| 312 | /** | ||
| 313 | * lc_get - get element by label, maybe change the active set | ||
| 314 | * @lc: the lru cache to operate on | ||
| 315 | * @enr: the label to look up | ||
| 316 | * | ||
| 317 | * Finds an element in the cache, increases its usage count, | ||
| 318 | * "touches" and returns it. | ||
| 319 | * | ||
| 320 | * In case the requested number is not present, it needs to be added to the | ||
| 321 | * cache. Therefore it is possible that an other element becomes evicted from | ||
| 322 | * the cache. In either case, the user is notified so he is able to e.g. keep | ||
| 323 | * a persistent log of the cache changes, and therefore the objects in use. | ||
| 324 | * | ||
| 325 | * Return values: | ||
| 326 | * NULL | ||
| 327 | * The cache was marked %LC_STARVING, | ||
| 328 | * or the requested label was not in the active set | ||
| 329 | * and a changing transaction is still pending (@lc was marked %LC_DIRTY). | ||
| 330 | * Or no unused or free element could be recycled (@lc will be marked as | ||
| 331 | * %LC_STARVING, blocking further lc_get() operations). | ||
| 332 | * | ||
| 333 | * pointer to the element with the REQUESTED element number. | ||
| 334 | * In this case, it can be used right away | ||
| 335 | * | ||
| 336 | * pointer to an UNUSED element with some different element number, | ||
| 337 | * where that different number may also be %LC_FREE. | ||
| 338 | * | ||
| 339 | * In this case, the cache is marked %LC_DIRTY (blocking further changes), | ||
| 340 | * and the returned element pointer is removed from the lru list and | ||
| 341 | * hash collision chains. The user now should do whatever housekeeping | ||
| 342 | * is necessary. | ||
| 343 | * Then he must call lc_changed(lc,element_pointer), to finish | ||
| 344 | * the change. | ||
| 345 | * | ||
| 346 | * NOTE: The user needs to check the lc_number on EACH use, so he recognizes | ||
| 347 | * any cache set change. | ||
| 348 | */ | ||
| 349 | struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) | ||
| 350 | { | ||
| 351 | struct lc_element *e; | ||
| 352 | |||
| 353 | PARANOIA_ENTRY(); | ||
| 354 | if (lc->flags & LC_STARVING) { | ||
| 355 | ++lc->starving; | ||
| 356 | RETURN(NULL); | ||
| 357 | } | ||
| 358 | |||
| 359 | e = lc_find(lc, enr); | ||
| 360 | if (e) { | ||
| 361 | ++lc->hits; | ||
| 362 | if (e->refcnt++ == 0) | ||
| 363 | lc->used++; | ||
| 364 | list_move(&e->list, &lc->in_use); /* Not evictable... */ | ||
| 365 | RETURN(e); | ||
| 366 | } | ||
| 367 | |||
| 368 | ++lc->misses; | ||
| 369 | |||
| 370 | /* In case there is nothing available and we can not kick out | ||
| 371 | * the LRU element, we have to wait ... | ||
| 372 | */ | ||
| 373 | if (!lc_unused_element_available(lc)) { | ||
| 374 | __set_bit(__LC_STARVING, &lc->flags); | ||
| 375 | RETURN(NULL); | ||
| 376 | } | ||
| 377 | |||
| 378 | /* it was not present in the active set. | ||
| 379 | * we are going to recycle an unused (or even "free") element. | ||
| 380 | * user may need to commit a transaction to record that change. | ||
| 381 | * we serialize on flags & TF_DIRTY */ | ||
| 382 | if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { | ||
| 383 | ++lc->dirty; | ||
| 384 | RETURN(NULL); | ||
| 385 | } | ||
| 386 | |||
| 387 | e = lc_get_unused_element(lc); | ||
| 388 | BUG_ON(!e); | ||
| 389 | |||
| 390 | clear_bit(__LC_STARVING, &lc->flags); | ||
| 391 | BUG_ON(++e->refcnt != 1); | ||
| 392 | lc->used++; | ||
| 393 | |||
| 394 | lc->changing_element = e; | ||
| 395 | lc->new_number = enr; | ||
| 396 | |||
| 397 | RETURN(e); | ||
| 398 | } | ||
| 399 | |||
| 400 | /* similar to lc_get, | ||
| 401 | * but only gets a new reference on an existing element. | ||
| 402 | * you either get the requested element, or NULL. | ||
| 403 | * will be consolidated into one function. | ||
| 404 | */ | ||
| 405 | struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) | ||
| 406 | { | ||
| 407 | struct lc_element *e; | ||
| 408 | |||
| 409 | PARANOIA_ENTRY(); | ||
| 410 | if (lc->flags & LC_STARVING) { | ||
| 411 | ++lc->starving; | ||
| 412 | RETURN(NULL); | ||
| 413 | } | ||
| 414 | |||
| 415 | e = lc_find(lc, enr); | ||
| 416 | if (e) { | ||
| 417 | ++lc->hits; | ||
| 418 | if (e->refcnt++ == 0) | ||
| 419 | lc->used++; | ||
| 420 | list_move(&e->list, &lc->in_use); /* Not evictable... */ | ||
| 421 | } | ||
| 422 | RETURN(e); | ||
| 423 | } | ||
| 424 | |||
| 425 | /** | ||
| 426 | * lc_changed - tell @lc that the change has been recorded | ||
| 427 | * @lc: the lru cache to operate on | ||
| 428 | * @e: the element pending label change | ||
| 429 | */ | ||
| 430 | void lc_changed(struct lru_cache *lc, struct lc_element *e) | ||
| 431 | { | ||
| 432 | PARANOIA_ENTRY(); | ||
| 433 | BUG_ON(e != lc->changing_element); | ||
| 434 | PARANOIA_LC_ELEMENT(lc, e); | ||
| 435 | ++lc->changed; | ||
| 436 | e->lc_number = lc->new_number; | ||
| 437 | list_add(&e->list, &lc->in_use); | ||
| 438 | hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); | ||
| 439 | lc->changing_element = NULL; | ||
| 440 | lc->new_number = LC_FREE; | ||
| 441 | clear_bit(__LC_DIRTY, &lc->flags); | ||
| 442 | smp_mb__after_clear_bit(); | ||
| 443 | RETURN(); | ||
| 444 | } | ||
| 445 | |||
| 446 | |||
| 447 | /** | ||
| 448 | * lc_put - give up refcnt of @e | ||
| 449 | * @lc: the lru cache to operate on | ||
| 450 | * @e: the element to put | ||
| 451 | * | ||
| 452 | * If refcnt reaches zero, the element is moved to the lru list, | ||
| 453 | * and a %LC_STARVING (if set) is cleared. | ||
| 454 | * Returns the new (post-decrement) refcnt. | ||
| 455 | */ | ||
| 456 | unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) | ||
| 457 | { | ||
| 458 | PARANOIA_ENTRY(); | ||
| 459 | PARANOIA_LC_ELEMENT(lc, e); | ||
| 460 | BUG_ON(e->refcnt == 0); | ||
| 461 | BUG_ON(e == lc->changing_element); | ||
| 462 | if (--e->refcnt == 0) { | ||
| 463 | /* move it to the front of LRU. */ | ||
| 464 | list_move(&e->list, &lc->lru); | ||
| 465 | lc->used--; | ||
| 466 | clear_bit(__LC_STARVING, &lc->flags); | ||
| 467 | smp_mb__after_clear_bit(); | ||
| 468 | } | ||
| 469 | RETURN(e->refcnt); | ||
| 470 | } | ||
| 471 | |||
| 472 | /** | ||
| 473 | * lc_element_by_index | ||
| 474 | * @lc: the lru cache to operate on | ||
| 475 | * @i: the index of the element to return | ||
| 476 | */ | ||
| 477 | struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i) | ||
| 478 | { | ||
| 479 | BUG_ON(i >= lc->nr_elements); | ||
| 480 | BUG_ON(lc->lc_element[i] == NULL); | ||
| 481 | BUG_ON(lc->lc_element[i]->lc_index != i); | ||
| 482 | return lc->lc_element[i]; | ||
| 483 | } | ||
| 484 | |||
| 485 | /** | ||
| 486 | * lc_index_of | ||
| 487 | * @lc: the lru cache to operate on | ||
| 488 | * @e: the element to query for its index position in lc->element | ||
| 489 | */ | ||
| 490 | unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) | ||
| 491 | { | ||
| 492 | PARANOIA_LC_ELEMENT(lc, e); | ||
| 493 | return e->lc_index; | ||
| 494 | } | ||
| 495 | |||
| 496 | /** | ||
| 497 | * lc_set - associate index with label | ||
| 498 | * @lc: the lru cache to operate on | ||
| 499 | * @enr: the label to set | ||
| 500 | * @index: the element index to associate label with. | ||
| 501 | * | ||
| 502 | * Used to initialize the active set to some previously recorded state. | ||
| 503 | */ | ||
| 504 | void lc_set(struct lru_cache *lc, unsigned int enr, int index) | ||
| 505 | { | ||
| 506 | struct lc_element *e; | ||
| 507 | |||
| 508 | if (index < 0 || index >= lc->nr_elements) | ||
| 509 | return; | ||
| 510 | |||
| 511 | e = lc_element_by_index(lc, index); | ||
| 512 | e->lc_number = enr; | ||
| 513 | |||
| 514 | hlist_del_init(&e->colision); | ||
| 515 | hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); | ||
| 516 | list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); | ||
| 517 | } | ||
| 518 | |||
| 519 | /** | ||
| 520 | * lc_dump - Dump a complete LRU cache to seq in textual form. | ||
| 521 | * @lc: the lru cache to operate on | ||
| 522 | * @seq: the &struct seq_file pointer to seq_printf into | ||
| 523 | * @utext: user supplied "heading" or other info | ||
| 524 | * @detail: function pointer the user may provide to dump further details | ||
| 525 | * of the object the lc_element is embedded in. | ||
| 526 | */ | ||
| 527 | void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, | ||
| 528 | void (*detail) (struct seq_file *, struct lc_element *)) | ||
| 529 | { | ||
| 530 | unsigned int nr_elements = lc->nr_elements; | ||
| 531 | struct lc_element *e; | ||
| 532 | int i; | ||
| 533 | |||
| 534 | seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext); | ||
| 535 | for (i = 0; i < nr_elements; i++) { | ||
| 536 | e = lc_element_by_index(lc, i); | ||
| 537 | if (e->lc_number == LC_FREE) { | ||
| 538 | seq_printf(seq, "\t%2d: FREE\n", i); | ||
| 539 | } else { | ||
| 540 | seq_printf(seq, "\t%2d: %4u %4u ", i, | ||
| 541 | e->lc_number, e->refcnt); | ||
| 542 | detail(seq, e); | ||
| 543 | } | ||
| 544 | } | ||
| 545 | } | ||
| 546 | |||
| 547 | EXPORT_SYMBOL(lc_create); | ||
| 548 | EXPORT_SYMBOL(lc_reset); | ||
| 549 | EXPORT_SYMBOL(lc_destroy); | ||
| 550 | EXPORT_SYMBOL(lc_set); | ||
| 551 | EXPORT_SYMBOL(lc_del); | ||
| 552 | EXPORT_SYMBOL(lc_try_get); | ||
| 553 | EXPORT_SYMBOL(lc_find); | ||
| 554 | EXPORT_SYMBOL(lc_get); | ||
| 555 | EXPORT_SYMBOL(lc_put); | ||
| 556 | EXPORT_SYMBOL(lc_changed); | ||
| 557 | EXPORT_SYMBOL(lc_element_by_index); | ||
| 558 | EXPORT_SYMBOL(lc_index_of); | ||
| 559 | EXPORT_SYMBOL(lc_seq_printf_stats); | ||
| 560 | EXPORT_SYMBOL(lc_seq_dump_details); | ||
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 23abbd93cae1..92cdd9936e3d 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
| @@ -200,6 +200,9 @@ radix_tree_node_free(struct radix_tree_node *node) | |||
| 200 | * ensure that the addition of a single element in the tree cannot fail. On | 200 | * ensure that the addition of a single element in the tree cannot fail. On |
| 201 | * success, return zero, with preemption disabled. On error, return -ENOMEM | 201 | * success, return zero, with preemption disabled. On error, return -ENOMEM |
| 202 | * with preemption not disabled. | 202 | * with preemption not disabled. |
| 203 | * | ||
| 204 | * To make use of this facility, the radix tree must be initialised without | ||
| 205 | * __GFP_WAIT being passed to INIT_RADIX_TREE(). | ||
| 203 | */ | 206 | */ |
| 204 | int radix_tree_preload(gfp_t gfp_mask) | 207 | int radix_tree_preload(gfp_t gfp_mask) |
| 205 | { | 208 | { |
| @@ -543,7 +546,6 @@ out: | |||
| 543 | } | 546 | } |
| 544 | EXPORT_SYMBOL(radix_tree_tag_clear); | 547 | EXPORT_SYMBOL(radix_tree_tag_clear); |
| 545 | 548 | ||
| 546 | #ifndef __KERNEL__ /* Only the test harness uses this at present */ | ||
| 547 | /** | 549 | /** |
| 548 | * radix_tree_tag_get - get a tag on a radix tree node | 550 | * radix_tree_tag_get - get a tag on a radix tree node |
| 549 | * @root: radix tree root | 551 | * @root: radix tree root |
| @@ -606,7 +608,6 @@ int radix_tree_tag_get(struct radix_tree_root *root, | |||
| 606 | } | 608 | } |
| 607 | } | 609 | } |
| 608 | EXPORT_SYMBOL(radix_tree_tag_get); | 610 | EXPORT_SYMBOL(radix_tree_tag_get); |
| 609 | #endif | ||
| 610 | 611 | ||
| 611 | /** | 612 | /** |
| 612 | * radix_tree_next_hole - find the next hole (not-present entry) | 613 | * radix_tree_next_hole - find the next hole (not-present entry) |
diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 26187edcc7ea..09f5ce1810dc 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c | |||
| @@ -7,15 +7,12 @@ | |||
| 7 | * parameter. Now every user can use their own standalone ratelimit_state. | 7 | * parameter. Now every user can use their own standalone ratelimit_state. |
| 8 | * | 8 | * |
| 9 | * This file is released under the GPLv2. | 9 | * This file is released under the GPLv2. |
| 10 | * | ||
| 11 | */ | 10 | */ |
| 12 | 11 | ||
| 13 | #include <linux/kernel.h> | 12 | #include <linux/ratelimit.h> |
| 14 | #include <linux/jiffies.h> | 13 | #include <linux/jiffies.h> |
| 15 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 16 | 15 | ||
| 17 | static DEFINE_SPINLOCK(ratelimit_lock); | ||
| 18 | |||
| 19 | /* | 16 | /* |
| 20 | * __ratelimit - rate limiting | 17 | * __ratelimit - rate limiting |
| 21 | * @rs: ratelimit_state data | 18 | * @rs: ratelimit_state data |
| @@ -23,35 +20,43 @@ static DEFINE_SPINLOCK(ratelimit_lock); | |||
| 23 | * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks | 20 | * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks |
| 24 | * in every @rs->ratelimit_jiffies | 21 | * in every @rs->ratelimit_jiffies |
| 25 | */ | 22 | */ |
| 26 | int __ratelimit(struct ratelimit_state *rs) | 23 | int ___ratelimit(struct ratelimit_state *rs, const char *func) |
| 27 | { | 24 | { |
| 28 | unsigned long flags; | 25 | unsigned long flags; |
| 26 | int ret; | ||
| 29 | 27 | ||
| 30 | if (!rs->interval) | 28 | if (!rs->interval) |
| 31 | return 1; | 29 | return 1; |
| 32 | 30 | ||
| 33 | spin_lock_irqsave(&ratelimit_lock, flags); | 31 | /* |
| 32 | * If we contend on this state's lock then almost | ||
| 33 | * by definition we are too busy to print a message, | ||
| 34 | * in addition to the one that will be printed by | ||
| 35 | * the entity that is holding the lock already: | ||
| 36 | */ | ||
| 37 | if (!spin_trylock_irqsave(&rs->lock, flags)) | ||
| 38 | return 1; | ||
| 39 | |||
| 34 | if (!rs->begin) | 40 | if (!rs->begin) |
| 35 | rs->begin = jiffies; | 41 | rs->begin = jiffies; |
| 36 | 42 | ||
| 37 | if (time_is_before_jiffies(rs->begin + rs->interval)) { | 43 | if (time_is_before_jiffies(rs->begin + rs->interval)) { |
| 38 | if (rs->missed) | 44 | if (rs->missed) |
| 39 | printk(KERN_WARNING "%s: %d callbacks suppressed\n", | 45 | printk(KERN_WARNING "%s: %d callbacks suppressed\n", |
| 40 | __func__, rs->missed); | 46 | func, rs->missed); |
| 41 | rs->begin = 0; | 47 | rs->begin = 0; |
| 42 | rs->printed = 0; | 48 | rs->printed = 0; |
| 43 | rs->missed = 0; | 49 | rs->missed = 0; |
| 44 | } | 50 | } |
| 45 | if (rs->burst && rs->burst > rs->printed) | 51 | if (rs->burst && rs->burst > rs->printed) { |
| 46 | goto print; | 52 | rs->printed++; |
| 47 | 53 | ret = 1; | |
| 48 | rs->missed++; | 54 | } else { |
| 49 | spin_unlock_irqrestore(&ratelimit_lock, flags); | 55 | rs->missed++; |
| 50 | return 0; | 56 | ret = 0; |
| 57 | } | ||
| 58 | spin_unlock_irqrestore(&rs->lock, flags); | ||
| 51 | 59 | ||
| 52 | print: | 60 | return ret; |
| 53 | rs->printed++; | ||
| 54 | spin_unlock_irqrestore(&ratelimit_lock, flags); | ||
| 55 | return 1; | ||
| 56 | } | 61 | } |
| 57 | EXPORT_SYMBOL(__ratelimit); | 62 | EXPORT_SYMBOL(___ratelimit); |
diff --git a/lib/swiotlb.c b/lib/swiotlb.c index ac25cd28e807..795472d8ae24 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c | |||
| @@ -97,6 +97,8 @@ static phys_addr_t *io_tlb_orig_addr; | |||
| 97 | */ | 97 | */ |
| 98 | static DEFINE_SPINLOCK(io_tlb_lock); | 98 | static DEFINE_SPINLOCK(io_tlb_lock); |
| 99 | 99 | ||
| 100 | static int late_alloc; | ||
| 101 | |||
| 100 | static int __init | 102 | static int __init |
| 101 | setup_io_tlb_npages(char *str) | 103 | setup_io_tlb_npages(char *str) |
| 102 | { | 104 | { |
| @@ -109,6 +111,7 @@ setup_io_tlb_npages(char *str) | |||
| 109 | ++str; | 111 | ++str; |
| 110 | if (!strcmp(str, "force")) | 112 | if (!strcmp(str, "force")) |
| 111 | swiotlb_force = 1; | 113 | swiotlb_force = 1; |
| 114 | |||
| 112 | return 1; | 115 | return 1; |
| 113 | } | 116 | } |
| 114 | __setup("swiotlb=", setup_io_tlb_npages); | 117 | __setup("swiotlb=", setup_io_tlb_npages); |
| @@ -121,8 +124,9 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, | |||
| 121 | return phys_to_dma(hwdev, virt_to_phys(address)); | 124 | return phys_to_dma(hwdev, virt_to_phys(address)); |
| 122 | } | 125 | } |
| 123 | 126 | ||
| 124 | static void swiotlb_print_info(unsigned long bytes) | 127 | void swiotlb_print_info(void) |
| 125 | { | 128 | { |
| 129 | unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; | ||
| 126 | phys_addr_t pstart, pend; | 130 | phys_addr_t pstart, pend; |
| 127 | 131 | ||
| 128 | pstart = virt_to_phys(io_tlb_start); | 132 | pstart = virt_to_phys(io_tlb_start); |
| @@ -140,7 +144,7 @@ static void swiotlb_print_info(unsigned long bytes) | |||
| 140 | * structures for the software IO TLB used to implement the DMA API. | 144 | * structures for the software IO TLB used to implement the DMA API. |
| 141 | */ | 145 | */ |
| 142 | void __init | 146 | void __init |
| 143 | swiotlb_init_with_default_size(size_t default_size) | 147 | swiotlb_init_with_default_size(size_t default_size, int verbose) |
| 144 | { | 148 | { |
| 145 | unsigned long i, bytes; | 149 | unsigned long i, bytes; |
| 146 | 150 | ||
| @@ -176,14 +180,14 @@ swiotlb_init_with_default_size(size_t default_size) | |||
| 176 | io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); | 180 | io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); |
| 177 | if (!io_tlb_overflow_buffer) | 181 | if (!io_tlb_overflow_buffer) |
| 178 | panic("Cannot allocate SWIOTLB overflow buffer!\n"); | 182 | panic("Cannot allocate SWIOTLB overflow buffer!\n"); |
| 179 | 183 | if (verbose) | |
| 180 | swiotlb_print_info(bytes); | 184 | swiotlb_print_info(); |
| 181 | } | 185 | } |
| 182 | 186 | ||
| 183 | void __init | 187 | void __init |
| 184 | swiotlb_init(void) | 188 | swiotlb_init(int verbose) |
| 185 | { | 189 | { |
| 186 | swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ | 190 | swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */ |
| 187 | } | 191 | } |
| 188 | 192 | ||
| 189 | /* | 193 | /* |
| @@ -260,7 +264,9 @@ swiotlb_late_init_with_default_size(size_t default_size) | |||
| 260 | if (!io_tlb_overflow_buffer) | 264 | if (!io_tlb_overflow_buffer) |
| 261 | goto cleanup4; | 265 | goto cleanup4; |
| 262 | 266 | ||
| 263 | swiotlb_print_info(bytes); | 267 | swiotlb_print_info(); |
| 268 | |||
| 269 | late_alloc = 1; | ||
| 264 | 270 | ||
| 265 | return 0; | 271 | return 0; |
| 266 | 272 | ||
| @@ -281,6 +287,32 @@ cleanup1: | |||
| 281 | return -ENOMEM; | 287 | return -ENOMEM; |
| 282 | } | 288 | } |
| 283 | 289 | ||
| 290 | void __init swiotlb_free(void) | ||
| 291 | { | ||
| 292 | if (!io_tlb_overflow_buffer) | ||
| 293 | return; | ||
| 294 | |||
| 295 | if (late_alloc) { | ||
| 296 | free_pages((unsigned long)io_tlb_overflow_buffer, | ||
| 297 | get_order(io_tlb_overflow)); | ||
| 298 | free_pages((unsigned long)io_tlb_orig_addr, | ||
| 299 | get_order(io_tlb_nslabs * sizeof(phys_addr_t))); | ||
| 300 | free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * | ||
| 301 | sizeof(int))); | ||
| 302 | free_pages((unsigned long)io_tlb_start, | ||
| 303 | get_order(io_tlb_nslabs << IO_TLB_SHIFT)); | ||
| 304 | } else { | ||
| 305 | free_bootmem_late(__pa(io_tlb_overflow_buffer), | ||
| 306 | io_tlb_overflow); | ||
| 307 | free_bootmem_late(__pa(io_tlb_orig_addr), | ||
| 308 | io_tlb_nslabs * sizeof(phys_addr_t)); | ||
| 309 | free_bootmem_late(__pa(io_tlb_list), | ||
| 310 | io_tlb_nslabs * sizeof(int)); | ||
| 311 | free_bootmem_late(__pa(io_tlb_start), | ||
| 312 | io_tlb_nslabs << IO_TLB_SHIFT); | ||
| 313 | } | ||
| 314 | } | ||
| 315 | |||
| 284 | static int is_swiotlb_buffer(phys_addr_t paddr) | 316 | static int is_swiotlb_buffer(phys_addr_t paddr) |
| 285 | { | 317 | { |
| 286 | return paddr >= virt_to_phys(io_tlb_start) && | 318 | return paddr >= virt_to_phys(io_tlb_start) && |
