aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig3
-rw-r--r--lib/Kconfig.debug3
-rw-r--r--lib/Makefile4
-rw-r--r--lib/average.c61
-rw-r--r--lib/debug_locks.c2
-rw-r--r--lib/dynamic_debug.c9
-rw-r--r--lib/hexdump.c16
-rw-r--r--lib/kref.c30
-rw-r--r--lib/nlattr.c22
-rw-r--r--lib/percpu_counter.c8
-rw-r--r--lib/radix-tree.c83
-rw-r--r--lib/timerqueue.c107
12 files changed, 298 insertions, 50 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index fa9bf2c06199..3116aa631af6 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -210,4 +210,7 @@ config GENERIC_ATOMIC64
210config LRU_CACHE 210config LRU_CACHE
211 tristate 211 tristate
212 212
213config AVERAGE
214 bool
215
213endmenu 216endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 28b42b9274d0..2d05adb98401 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -173,7 +173,8 @@ config LOCKUP_DETECTOR
173 An NMI is generated every 60 seconds or so to check for hardlockups. 173 An NMI is generated every 60 seconds or so to check for hardlockups.
174 174
175config HARDLOCKUP_DETECTOR 175config HARDLOCKUP_DETECTOR
176 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI 176 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
177 !ARCH_HAS_NMI_WATCHDOG
177 178
178config BOOTPARAM_SOFTLOCKUP_PANIC 179config BOOTPARAM_SOFTLOCKUP_PANIC
179 bool "Panic (Reboot) On Soft Lockups" 180 bool "Panic (Reboot) On Soft Lockups"
diff --git a/lib/Makefile b/lib/Makefile
index e6a3763b8212..d7b6e30a3a1e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,7 +8,7 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
8endif 8endif
9 9
10lib-y := ctype.o string.o vsprintf.o cmdline.o \ 10lib-y := ctype.o string.o vsprintf.o cmdline.o \
11 rbtree.o radix-tree.o dump_stack.o \ 11 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
12 idr.o int_sqrt.o extable.o prio_tree.o \ 12 idr.o int_sqrt.o extable.o prio_tree.o \
13 sha1.o irq_regs.o reciprocal_div.o argv_split.o \ 13 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
14 proportions.o prio_heap.o ratelimit.o show_mem.o \ 14 proportions.o prio_heap.o ratelimit.o show_mem.o \
@@ -106,6 +106,8 @@ obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
106 106
107obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o 107obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
108 108
109obj-$(CONFIG_AVERAGE) += average.o
110
109hostprogs-y := gen_crc32table 111hostprogs-y := gen_crc32table
110clean-files := crc32table.h 112clean-files := crc32table.h
111 113
diff --git a/lib/average.c b/lib/average.c
new file mode 100644
index 000000000000..5576c2841496
--- /dev/null
+++ b/lib/average.c
@@ -0,0 +1,61 @@
1/*
2 * lib/average.c
3 *
4 * This source code is licensed under the GNU General Public License,
5 * Version 2. See the file COPYING for more details.
6 */
7
8#include <linux/module.h>
9#include <linux/average.h>
10#include <linux/bug.h>
11#include <linux/log2.h>
12
13/**
14 * DOC: Exponentially Weighted Moving Average (EWMA)
15 *
16 * These are generic functions for calculating Exponentially Weighted Moving
17 * Averages (EWMA). We keep a structure with the EWMA parameters and a scaled
18 * up internal representation of the average value to prevent rounding errors.
19 * The factor for scaling up and the exponential weight (or decay rate) have to
20 * be specified thru the init fuction. The structure should not be accessed
21 * directly but only thru the helper functions.
22 */
23
24/**
25 * ewma_init() - Initialize EWMA parameters
26 * @avg: Average structure
27 * @factor: Factor to use for the scaled up internal value. The maximum value
28 * of averages can be ULONG_MAX/(factor*weight). For performance reasons
29 * factor has to be a power of 2.
30 * @weight: Exponential weight, or decay rate. This defines how fast the
31 * influence of older values decreases. For performance reasons weight has
32 * to be a power of 2.
33 *
34 * Initialize the EWMA parameters for a given struct ewma @avg.
35 */
36void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight)
37{
38 WARN_ON(!is_power_of_2(weight) || !is_power_of_2(factor));
39
40 avg->weight = ilog2(weight);
41 avg->factor = ilog2(factor);
42 avg->internal = 0;
43}
44EXPORT_SYMBOL(ewma_init);
45
46/**
47 * ewma_add() - Exponentially weighted moving average (EWMA)
48 * @avg: Average structure
49 * @val: Current value
50 *
51 * Add a sample to the average.
52 */
53struct ewma *ewma_add(struct ewma *avg, unsigned long val)
54{
55 avg->internal = avg->internal ?
56 (((avg->internal << avg->weight) - avg->internal) +
57 (val << avg->factor)) >> avg->weight :
58 (val << avg->factor);
59 return avg;
60}
61EXPORT_SYMBOL(ewma_add);
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index 5bf0020b9248..b1c177307677 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -8,7 +8,6 @@
8 * 8 *
9 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 9 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
10 */ 10 */
11#include <linux/kernel.h>
12#include <linux/rwsem.h> 11#include <linux/rwsem.h>
13#include <linux/mutex.h> 12#include <linux/mutex.h>
14#include <linux/module.h> 13#include <linux/module.h>
@@ -39,7 +38,6 @@ int debug_locks_off(void)
39{ 38{
40 if (__debug_locks_off()) { 39 if (__debug_locks_off()) {
41 if (!debug_locks_silent) { 40 if (!debug_locks_silent) {
42 oops_in_progress = 1;
43 console_verbose(); 41 console_verbose();
44 return 1; 42 return 1;
45 } 43 }
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 3094318bfea7..b335acb43be2 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -141,11 +141,10 @@ static void ddebug_change(const struct ddebug_query *query,
141 else if (!dp->flags) 141 else if (!dp->flags)
142 dt->num_enabled++; 142 dt->num_enabled++;
143 dp->flags = newflags; 143 dp->flags = newflags;
144 if (newflags) { 144 if (newflags)
145 jump_label_enable(&dp->enabled); 145 dp->enabled = 1;
146 } else { 146 else
147 jump_label_disable(&dp->enabled); 147 dp->enabled = 0;
148 }
149 if (verbose) 148 if (verbose)
150 printk(KERN_INFO 149 printk(KERN_INFO
151 "ddebug: changed %s:%d [%s]%s %s\n", 150 "ddebug: changed %s:%d [%s]%s %s\n",
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 5d7a4802c562..b66b2bd67952 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -34,6 +34,22 @@ int hex_to_bin(char ch)
34EXPORT_SYMBOL(hex_to_bin); 34EXPORT_SYMBOL(hex_to_bin);
35 35
36/** 36/**
37 * hex2bin - convert an ascii hexadecimal string to its binary representation
38 * @dst: binary result
39 * @src: ascii hexadecimal string
40 * @count: result length
41 */
42void hex2bin(u8 *dst, const char *src, size_t count)
43{
44 while (count--) {
45 *dst = hex_to_bin(*src++) << 4;
46 *dst += hex_to_bin(*src++);
47 dst++;
48 }
49}
50EXPORT_SYMBOL(hex2bin);
51
52/**
37 * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory 53 * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory
38 * @buf: data blob to dump 54 * @buf: data blob to dump
39 * @len: number of bytes in the @buf 55 * @len: number of bytes in the @buf
diff --git a/lib/kref.c b/lib/kref.c
index d3d227a08a4b..3efb882b11db 100644
--- a/lib/kref.c
+++ b/lib/kref.c
@@ -62,6 +62,36 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref))
62 return 0; 62 return 0;
63} 63}
64 64
65
66/**
67 * kref_sub - subtract a number of refcounts for object.
68 * @kref: object.
69 * @count: Number of recounts to subtract.
70 * @release: pointer to the function that will clean up the object when the
71 * last reference to the object is released.
72 * This pointer is required, and it is not acceptable to pass kfree
73 * in as this function.
74 *
75 * Subtract @count from the refcount, and if 0, call release().
76 * Return 1 if the object was removed, otherwise return 0. Beware, if this
77 * function returns 0, you still can not count on the kref from remaining in
78 * memory. Only use the return value if you want to see if the kref is now
79 * gone, not present.
80 */
81int kref_sub(struct kref *kref, unsigned int count,
82 void (*release)(struct kref *kref))
83{
84 WARN_ON(release == NULL);
85 WARN_ON(release == (void (*)(struct kref *))kfree);
86
87 if (atomic_sub_and_test((int) count, &kref->refcount)) {
88 release(kref);
89 return 1;
90 }
91 return 0;
92}
93
65EXPORT_SYMBOL(kref_init); 94EXPORT_SYMBOL(kref_init);
66EXPORT_SYMBOL(kref_get); 95EXPORT_SYMBOL(kref_get);
67EXPORT_SYMBOL(kref_put); 96EXPORT_SYMBOL(kref_put);
97EXPORT_SYMBOL(kref_sub);
diff --git a/lib/nlattr.c b/lib/nlattr.c
index c4706eb98d3d..00e8a02681a6 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -15,7 +15,7 @@
15#include <linux/types.h> 15#include <linux/types.h>
16#include <net/netlink.h> 16#include <net/netlink.h>
17 17
18static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { 18static const u16 nla_attr_minlen[NLA_TYPE_MAX+1] = {
19 [NLA_U8] = sizeof(u8), 19 [NLA_U8] = sizeof(u8),
20 [NLA_U16] = sizeof(u16), 20 [NLA_U16] = sizeof(u16),
21 [NLA_U32] = sizeof(u32), 21 [NLA_U32] = sizeof(u32),
@@ -23,7 +23,7 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
23 [NLA_NESTED] = NLA_HDRLEN, 23 [NLA_NESTED] = NLA_HDRLEN,
24}; 24};
25 25
26static int validate_nla(struct nlattr *nla, int maxtype, 26static int validate_nla(const struct nlattr *nla, int maxtype,
27 const struct nla_policy *policy) 27 const struct nla_policy *policy)
28{ 28{
29 const struct nla_policy *pt; 29 const struct nla_policy *pt;
@@ -115,10 +115,10 @@ static int validate_nla(struct nlattr *nla, int maxtype,
115 * 115 *
116 * Returns 0 on success or a negative error code. 116 * Returns 0 on success or a negative error code.
117 */ 117 */
118int nla_validate(struct nlattr *head, int len, int maxtype, 118int nla_validate(const struct nlattr *head, int len, int maxtype,
119 const struct nla_policy *policy) 119 const struct nla_policy *policy)
120{ 120{
121 struct nlattr *nla; 121 const struct nlattr *nla;
122 int rem, err; 122 int rem, err;
123 123
124 nla_for_each_attr(nla, head, len, rem) { 124 nla_for_each_attr(nla, head, len, rem) {
@@ -173,10 +173,10 @@ nla_policy_len(const struct nla_policy *p, int n)
173 * 173 *
174 * Returns 0 on success or a negative error code. 174 * Returns 0 on success or a negative error code.
175 */ 175 */
176int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, 176int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
177 const struct nla_policy *policy) 177 int len, const struct nla_policy *policy)
178{ 178{
179 struct nlattr *nla; 179 const struct nlattr *nla;
180 int rem, err; 180 int rem, err;
181 181
182 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); 182 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
@@ -191,7 +191,7 @@ int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
191 goto errout; 191 goto errout;
192 } 192 }
193 193
194 tb[type] = nla; 194 tb[type] = (struct nlattr *)nla;
195 } 195 }
196 } 196 }
197 197
@@ -212,14 +212,14 @@ errout:
212 * 212 *
213 * Returns the first attribute in the stream matching the specified type. 213 * Returns the first attribute in the stream matching the specified type.
214 */ 214 */
215struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) 215struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype)
216{ 216{
217 struct nlattr *nla; 217 const struct nlattr *nla;
218 int rem; 218 int rem;
219 219
220 nla_for_each_attr(nla, head, len, rem) 220 nla_for_each_attr(nla, head, len, rem)
221 if (nla_type(nla) == attrtype) 221 if (nla_type(nla) == attrtype)
222 return nla; 222 return (struct nlattr *)nla;
223 223
224 return NULL; 224 return NULL;
225} 225}
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 604678d7d06d..28f2c33c6b53 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -72,18 +72,16 @@ EXPORT_SYMBOL(percpu_counter_set);
72void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) 72void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
73{ 73{
74 s64 count; 74 s64 count;
75 s32 *pcount;
76 75
77 preempt_disable(); 76 preempt_disable();
78 pcount = this_cpu_ptr(fbc->counters); 77 count = __this_cpu_read(*fbc->counters) + amount;
79 count = *pcount + amount;
80 if (count >= batch || count <= -batch) { 78 if (count >= batch || count <= -batch) {
81 spin_lock(&fbc->lock); 79 spin_lock(&fbc->lock);
82 fbc->count += count; 80 fbc->count += count;
83 *pcount = 0; 81 __this_cpu_write(*fbc->counters, 0);
84 spin_unlock(&fbc->lock); 82 spin_unlock(&fbc->lock);
85 } else { 83 } else {
86 *pcount = count; 84 __this_cpu_write(*fbc->counters, count);
87 } 85 }
88 preempt_enable(); 86 preempt_enable();
89} 87}
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 6f412ab4c24f..5086bb962b4d 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -82,6 +82,16 @@ struct radix_tree_preload {
82}; 82};
83static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 83static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
84 84
85static inline void *ptr_to_indirect(void *ptr)
86{
87 return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
88}
89
90static inline void *indirect_to_ptr(void *ptr)
91{
92 return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
93}
94
85static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 95static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
86{ 96{
87 return root->gfp_mask & __GFP_BITS_MASK; 97 return root->gfp_mask & __GFP_BITS_MASK;
@@ -265,7 +275,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
265 return -ENOMEM; 275 return -ENOMEM;
266 276
267 /* Increase the height. */ 277 /* Increase the height. */
268 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 278 node->slots[0] = indirect_to_ptr(root->rnode);
269 279
270 /* Propagate the aggregated tag info into the new root */ 280 /* Propagate the aggregated tag info into the new root */
271 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 281 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
@@ -276,7 +286,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
276 newheight = root->height+1; 286 newheight = root->height+1;
277 node->height = newheight; 287 node->height = newheight;
278 node->count = 1; 288 node->count = 1;
279 node = radix_tree_ptr_to_indirect(node); 289 node = ptr_to_indirect(node);
280 rcu_assign_pointer(root->rnode, node); 290 rcu_assign_pointer(root->rnode, node);
281 root->height = newheight; 291 root->height = newheight;
282 } while (height > root->height); 292 } while (height > root->height);
@@ -309,7 +319,7 @@ int radix_tree_insert(struct radix_tree_root *root,
309 return error; 319 return error;
310 } 320 }
311 321
312 slot = radix_tree_indirect_to_ptr(root->rnode); 322 slot = indirect_to_ptr(root->rnode);
313 323
314 height = root->height; 324 height = root->height;
315 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 325 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
@@ -325,8 +335,7 @@ int radix_tree_insert(struct radix_tree_root *root,
325 rcu_assign_pointer(node->slots[offset], slot); 335 rcu_assign_pointer(node->slots[offset], slot);
326 node->count++; 336 node->count++;
327 } else 337 } else
328 rcu_assign_pointer(root->rnode, 338 rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
329 radix_tree_ptr_to_indirect(slot));
330 } 339 }
331 340
332 /* Go a level down */ 341 /* Go a level down */
@@ -374,7 +383,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
374 return NULL; 383 return NULL;
375 return is_slot ? (void *)&root->rnode : node; 384 return is_slot ? (void *)&root->rnode : node;
376 } 385 }
377 node = radix_tree_indirect_to_ptr(node); 386 node = indirect_to_ptr(node);
378 387
379 height = node->height; 388 height = node->height;
380 if (index > radix_tree_maxindex(height)) 389 if (index > radix_tree_maxindex(height))
@@ -393,7 +402,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
393 height--; 402 height--;
394 } while (height > 0); 403 } while (height > 0);
395 404
396 return is_slot ? (void *)slot:node; 405 return is_slot ? (void *)slot : indirect_to_ptr(node);
397} 406}
398 407
399/** 408/**
@@ -455,7 +464,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
455 height = root->height; 464 height = root->height;
456 BUG_ON(index > radix_tree_maxindex(height)); 465 BUG_ON(index > radix_tree_maxindex(height));
457 466
458 slot = radix_tree_indirect_to_ptr(root->rnode); 467 slot = indirect_to_ptr(root->rnode);
459 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 468 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
460 469
461 while (height > 0) { 470 while (height > 0) {
@@ -509,7 +518,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
509 518
510 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 519 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
511 pathp->node = NULL; 520 pathp->node = NULL;
512 slot = radix_tree_indirect_to_ptr(root->rnode); 521 slot = indirect_to_ptr(root->rnode);
513 522
514 while (height > 0) { 523 while (height > 0) {
515 int offset; 524 int offset;
@@ -579,7 +588,7 @@ int radix_tree_tag_get(struct radix_tree_root *root,
579 588
580 if (!radix_tree_is_indirect_ptr(node)) 589 if (!radix_tree_is_indirect_ptr(node))
581 return (index == 0); 590 return (index == 0);
582 node = radix_tree_indirect_to_ptr(node); 591 node = indirect_to_ptr(node);
583 592
584 height = node->height; 593 height = node->height;
585 if (index > radix_tree_maxindex(height)) 594 if (index > radix_tree_maxindex(height))
@@ -666,7 +675,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
666 } 675 }
667 676
668 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 677 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
669 slot = radix_tree_indirect_to_ptr(root->rnode); 678 slot = indirect_to_ptr(root->rnode);
670 679
671 /* 680 /*
672 * we fill the path from (root->height - 2) to 0, leaving the index at 681 * we fill the path from (root->height - 2) to 0, leaving the index at
@@ -897,7 +906,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
897 results[0] = node; 906 results[0] = node;
898 return 1; 907 return 1;
899 } 908 }
900 node = radix_tree_indirect_to_ptr(node); 909 node = indirect_to_ptr(node);
901 910
902 max_index = radix_tree_maxindex(node->height); 911 max_index = radix_tree_maxindex(node->height);
903 912
@@ -916,7 +925,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
916 slot = *(((void ***)results)[ret + i]); 925 slot = *(((void ***)results)[ret + i]);
917 if (!slot) 926 if (!slot)
918 continue; 927 continue;
919 results[ret + nr_found] = rcu_dereference_raw(slot); 928 results[ret + nr_found] =
929 indirect_to_ptr(rcu_dereference_raw(slot));
920 nr_found++; 930 nr_found++;
921 } 931 }
922 ret += nr_found; 932 ret += nr_found;
@@ -965,7 +975,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
965 results[0] = (void **)&root->rnode; 975 results[0] = (void **)&root->rnode;
966 return 1; 976 return 1;
967 } 977 }
968 node = radix_tree_indirect_to_ptr(node); 978 node = indirect_to_ptr(node);
969 979
970 max_index = radix_tree_maxindex(node->height); 980 max_index = radix_tree_maxindex(node->height);
971 981
@@ -1090,7 +1100,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
1090 results[0] = node; 1100 results[0] = node;
1091 return 1; 1101 return 1;
1092 } 1102 }
1093 node = radix_tree_indirect_to_ptr(node); 1103 node = indirect_to_ptr(node);
1094 1104
1095 max_index = radix_tree_maxindex(node->height); 1105 max_index = radix_tree_maxindex(node->height);
1096 1106
@@ -1109,7 +1119,8 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
1109 slot = *(((void ***)results)[ret + i]); 1119 slot = *(((void ***)results)[ret + i]);
1110 if (!slot) 1120 if (!slot)
1111 continue; 1121 continue;
1112 results[ret + nr_found] = rcu_dereference_raw(slot); 1122 results[ret + nr_found] =
1123 indirect_to_ptr(rcu_dereference_raw(slot));
1113 nr_found++; 1124 nr_found++;
1114 } 1125 }
1115 ret += nr_found; 1126 ret += nr_found;
@@ -1159,7 +1170,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1159 results[0] = (void **)&root->rnode; 1170 results[0] = (void **)&root->rnode;
1160 return 1; 1171 return 1;
1161 } 1172 }
1162 node = radix_tree_indirect_to_ptr(node); 1173 node = indirect_to_ptr(node);
1163 1174
1164 max_index = radix_tree_maxindex(node->height); 1175 max_index = radix_tree_maxindex(node->height);
1165 1176
@@ -1195,7 +1206,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1195 void *newptr; 1206 void *newptr;
1196 1207
1197 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1208 BUG_ON(!radix_tree_is_indirect_ptr(to_free));
1198 to_free = radix_tree_indirect_to_ptr(to_free); 1209 to_free = indirect_to_ptr(to_free);
1199 1210
1200 /* 1211 /*
1201 * The candidate node has more than one child, or its child 1212 * The candidate node has more than one child, or its child
@@ -1208,16 +1219,39 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1208 1219
1209 /* 1220 /*
1210 * We don't need rcu_assign_pointer(), since we are simply 1221 * We don't need rcu_assign_pointer(), since we are simply
1211 * moving the node from one part of the tree to another. If 1222 * moving the node from one part of the tree to another: if it
1212 * it was safe to dereference the old pointer to it 1223 * was safe to dereference the old pointer to it
1213 * (to_free->slots[0]), it will be safe to dereference the new 1224 * (to_free->slots[0]), it will be safe to dereference the new
1214 * one (root->rnode). 1225 * one (root->rnode) as far as dependent read barriers go.
1215 */ 1226 */
1216 newptr = to_free->slots[0]; 1227 newptr = to_free->slots[0];
1217 if (root->height > 1) 1228 if (root->height > 1)
1218 newptr = radix_tree_ptr_to_indirect(newptr); 1229 newptr = ptr_to_indirect(newptr);
1219 root->rnode = newptr; 1230 root->rnode = newptr;
1220 root->height--; 1231 root->height--;
1232
1233 /*
1234 * We have a dilemma here. The node's slot[0] must not be
1235 * NULLed in case there are concurrent lookups expecting to
1236 * find the item. However if this was a bottom-level node,
1237 * then it may be subject to the slot pointer being visible
1238 * to callers dereferencing it. If item corresponding to
1239 * slot[0] is subsequently deleted, these callers would expect
1240 * their slot to become empty sooner or later.
1241 *
1242 * For example, lockless pagecache will look up a slot, deref
1243 * the page pointer, and if the page is 0 refcount it means it
1244 * was concurrently deleted from pagecache so try the deref
1245 * again. Fortunately there is already a requirement for logic
1246 * to retry the entire slot lookup -- the indirect pointer
1247 * problem (replacing direct root node with an indirect pointer
1248 * also results in a stale slot). So tag the slot as indirect
1249 * to force callers to retry.
1250 */
1251 if (root->height == 0)
1252 *((unsigned long *)&to_free->slots[0]) |=
1253 RADIX_TREE_INDIRECT_PTR;
1254
1221 radix_tree_node_free(to_free); 1255 radix_tree_node_free(to_free);
1222 } 1256 }
1223} 1257}
@@ -1254,7 +1288,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1254 root->rnode = NULL; 1288 root->rnode = NULL;
1255 goto out; 1289 goto out;
1256 } 1290 }
1257 slot = radix_tree_indirect_to_ptr(slot); 1291 slot = indirect_to_ptr(slot);
1258 1292
1259 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 1293 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
1260 pathp->node = NULL; 1294 pathp->node = NULL;
@@ -1296,8 +1330,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1296 radix_tree_node_free(to_free); 1330 radix_tree_node_free(to_free);
1297 1331
1298 if (pathp->node->count) { 1332 if (pathp->node->count) {
1299 if (pathp->node == 1333 if (pathp->node == indirect_to_ptr(root->rnode))
1300 radix_tree_indirect_to_ptr(root->rnode))
1301 radix_tree_shrink(root); 1334 radix_tree_shrink(root);
1302 goto out; 1335 goto out;
1303 } 1336 }
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
new file mode 100644
index 000000000000..e3a1050e6820
--- /dev/null
+++ b/lib/timerqueue.c
@@ -0,0 +1,107 @@
1/*
2 * Generic Timer-queue
3 *
4 * Manages a simple queue of timers, ordered by expiration time.
5 * Uses rbtrees for quick list adds and expiration.
6 *
7 * NOTE: All of the following functions need to be serialized
8 * to avoid races. No locking is done by this libary code.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25#include <linux/timerqueue.h>
26#include <linux/rbtree.h>
27#include <linux/module.h>
28
29/**
30 * timerqueue_add - Adds timer to timerqueue.
31 *
32 * @head: head of timerqueue
33 * @node: timer node to be added
34 *
35 * Adds the timer node to the timerqueue, sorted by the
36 * node's expires value.
37 */
38void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
39{
40 struct rb_node **p = &head->head.rb_node;
41 struct rb_node *parent = NULL;
42 struct timerqueue_node *ptr;
43
44 /* Make sure we don't add nodes that are already added */
45 WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));
46
47 while (*p) {
48 parent = *p;
49 ptr = rb_entry(parent, struct timerqueue_node, node);
50 if (node->expires.tv64 < ptr->expires.tv64)
51 p = &(*p)->rb_left;
52 else
53 p = &(*p)->rb_right;
54 }
55 rb_link_node(&node->node, parent, p);
56 rb_insert_color(&node->node, &head->head);
57
58 if (!head->next || node->expires.tv64 < head->next->expires.tv64)
59 head->next = node;
60}
61EXPORT_SYMBOL_GPL(timerqueue_add);
62
63/**
64 * timerqueue_del - Removes a timer from the timerqueue.
65 *
66 * @head: head of timerqueue
67 * @node: timer node to be removed
68 *
69 * Removes the timer node from the timerqueue.
70 */
71void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
72{
73 WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
74
75 /* update next pointer */
76 if (head->next == node) {
77 struct rb_node *rbn = rb_next(&node->node);
78
79 head->next = rbn ?
80 rb_entry(rbn, struct timerqueue_node, node) : NULL;
81 }
82 rb_erase(&node->node, &head->head);
83 RB_CLEAR_NODE(&node->node);
84}
85EXPORT_SYMBOL_GPL(timerqueue_del);
86
87/**
88 * timerqueue_iterate_next - Returns the timer after the provided timer
89 *
90 * @node: Pointer to a timer.
91 *
92 * Provides the timer that is after the given node. This is used, when
93 * necessary, to iterate through the list of timers in a timer list
94 * without modifying the list.
95 */
96struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node)
97{
98 struct rb_node *next;
99
100 if (!node)
101 return NULL;
102 next = rb_next(&node->node);
103 if (!next)
104 return NULL;
105 return container_of(next, struct timerqueue_node, node);
106}
107EXPORT_SYMBOL_GPL(timerqueue_iterate_next);