aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-21 01:31:33 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-21 01:31:33 -0400
commit5469dc270cd44c451590d40c031e6a71c1f637e8 (patch)
tree5ca6330c2d754dbe82bfa75964a7f828f364e48f /lib
parent2f37dd131c5d3a2eac21cd5baf80658b1b02a8ac (diff)
parentea9b50133ffebbd580cb5cd0aa222784d7a2fcb1 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: - the rest of MM - KASAN updates - procfs updates - exit, fork updates - printk updates - lib/ updates - radix-tree testsuite updates - checkpatch updates - kprobes updates - a few other misc bits * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (162 commits) samples/kprobes: print out the symbol name for the hooks samples/kprobes: add a new module parameter kprobes: add the "tls" argument for j_do_fork init/main.c: simplify initcall_blacklisted() fs/efs/super.c: fix return value checkpatch: improve --git <commit-count> shortcut checkpatch: reduce number of `git log` calls with --git checkpatch: add support to check already applied git commits checkpatch: add --list-types to show message types to show or ignore checkpatch: advertise the --fix and --fix-inplace options more checkpatch: whine about ACCESS_ONCE checkpatch: add test for keywords not starting on tabstops checkpatch: improve CONSTANT_COMPARISON test for structure members checkpatch: add PREFER_IS_ENABLED test lib/GCD.c: use binary GCD algorithm instead of Euclidean radix-tree: free up the bottom bit of exceptional entries for reuse dax: move RADIX_DAX_ definitions to dax.c radix-tree: make radix_tree_descend() more useful radix-tree: introduce radix_tree_replace_clear_tags() radix-tree: tidy up __radix_tree_create() ...
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig3
-rw-r--r--lib/gcd.c77
-rw-r--r--lib/nmi_backtrace.c89
-rw-r--r--lib/radix-tree.c933
-rw-r--r--lib/strncpy_from_user.c2
-rw-r--r--lib/test_kasan.c69
-rw-r--r--lib/uuid.c91
-rw-r--r--lib/vsprintf.c21
8 files changed, 699 insertions, 586 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 61d55bd0ed89..d79909dc01ec 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -362,6 +362,9 @@ config INTERVAL_TREE
362 362
363 for more information. 363 for more information.
364 364
365config RADIX_TREE_MULTIORDER
366 bool
367
365config ASSOCIATIVE_ARRAY 368config ASSOCIATIVE_ARRAY
366 bool 369 bool
367 help 370 help
diff --git a/lib/gcd.c b/lib/gcd.c
index 3657f129d7b8..135ee6407a5e 100644
--- a/lib/gcd.c
+++ b/lib/gcd.c
@@ -2,20 +2,77 @@
2#include <linux/gcd.h> 2#include <linux/gcd.h>
3#include <linux/export.h> 3#include <linux/export.h>
4 4
5/* Greatest common divisor */ 5/*
6 * This implements the binary GCD algorithm. (Often attributed to Stein,
7 * but as Knuth has noted, appears in a first-century Chinese math text.)
8 *
9 * This is faster than the division-based algorithm even on x86, which
10 * has decent hardware division.
11 */
12
13#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) && !defined(CPU_NO_EFFICIENT_FFS)
14
15/* If __ffs is available, the even/odd algorithm benchmarks slower. */
6unsigned long gcd(unsigned long a, unsigned long b) 16unsigned long gcd(unsigned long a, unsigned long b)
7{ 17{
8 unsigned long r; 18 unsigned long r = a | b;
19
20 if (!a || !b)
21 return r;
9 22
10 if (a < b) 23 b >>= __ffs(b);
11 swap(a, b); 24 if (b == 1)
25 return r & -r;
12 26
13 if (!b) 27 for (;;) {
14 return a; 28 a >>= __ffs(a);
15 while ((r = a % b) != 0) { 29 if (a == 1)
16 a = b; 30 return r & -r;
17 b = r; 31 if (a == b)
32 return a << __ffs(r);
33
34 if (a < b)
35 swap(a, b);
36 a -= b;
18 } 37 }
19 return b;
20} 38}
39
40#else
41
42/* If normalization is done by loops, the even/odd algorithm is a win. */
43unsigned long gcd(unsigned long a, unsigned long b)
44{
45 unsigned long r = a | b;
46
47 if (!a || !b)
48 return r;
49
50 /* Isolate lsbit of r */
51 r &= -r;
52
53 while (!(b & r))
54 b >>= 1;
55 if (b == r)
56 return r;
57
58 for (;;) {
59 while (!(a & r))
60 a >>= 1;
61 if (a == r)
62 return r;
63 if (a == b)
64 return a;
65
66 if (a < b)
67 swap(a, b);
68 a -= b;
69 a >>= 1;
70 if (a & r)
71 a += b;
72 a >>= 1;
73 }
74}
75
76#endif
77
21EXPORT_SYMBOL_GPL(gcd); 78EXPORT_SYMBOL_GPL(gcd);
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 6019c53c669e..26caf51cc238 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -16,33 +16,14 @@
16#include <linux/delay.h> 16#include <linux/delay.h>
17#include <linux/kprobes.h> 17#include <linux/kprobes.h>
18#include <linux/nmi.h> 18#include <linux/nmi.h>
19#include <linux/seq_buf.h>
20 19
21#ifdef arch_trigger_all_cpu_backtrace 20#ifdef arch_trigger_all_cpu_backtrace
22/* For reliability, we're prepared to waste bits here. */ 21/* For reliability, we're prepared to waste bits here. */
23static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; 22static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
24static cpumask_t printtrace_mask;
25
26#define NMI_BUF_SIZE 4096
27
28struct nmi_seq_buf {
29 unsigned char buffer[NMI_BUF_SIZE];
30 struct seq_buf seq;
31};
32
33/* Safe printing in NMI context */
34static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq);
35 23
36/* "in progress" flag of arch_trigger_all_cpu_backtrace */ 24/* "in progress" flag of arch_trigger_all_cpu_backtrace */
37static unsigned long backtrace_flag; 25static unsigned long backtrace_flag;
38 26
39static void print_seq_line(struct nmi_seq_buf *s, int start, int end)
40{
41 const char *buf = s->buffer + start;
42
43 printk("%.*s", (end - start) + 1, buf);
44}
45
46/* 27/*
47 * When raise() is called it will be is passed a pointer to the 28 * When raise() is called it will be is passed a pointer to the
48 * backtrace_mask. Architectures that call nmi_cpu_backtrace() 29 * backtrace_mask. Architectures that call nmi_cpu_backtrace()
@@ -52,8 +33,7 @@ static void print_seq_line(struct nmi_seq_buf *s, int start, int end)
52void nmi_trigger_all_cpu_backtrace(bool include_self, 33void nmi_trigger_all_cpu_backtrace(bool include_self,
53 void (*raise)(cpumask_t *mask)) 34 void (*raise)(cpumask_t *mask))
54{ 35{
55 struct nmi_seq_buf *s; 36 int i, this_cpu = get_cpu();
56 int i, cpu, this_cpu = get_cpu();
57 37
58 if (test_and_set_bit(0, &backtrace_flag)) { 38 if (test_and_set_bit(0, &backtrace_flag)) {
59 /* 39 /*
@@ -68,17 +48,6 @@ void nmi_trigger_all_cpu_backtrace(bool include_self,
68 if (!include_self) 48 if (!include_self)
69 cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask)); 49 cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
70 50
71 cpumask_copy(&printtrace_mask, to_cpumask(backtrace_mask));
72
73 /*
74 * Set up per_cpu seq_buf buffers that the NMIs running on the other
75 * CPUs will write to.
76 */
77 for_each_cpu(cpu, to_cpumask(backtrace_mask)) {
78 s = &per_cpu(nmi_print_seq, cpu);
79 seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE);
80 }
81
82 if (!cpumask_empty(to_cpumask(backtrace_mask))) { 51 if (!cpumask_empty(to_cpumask(backtrace_mask))) {
83 pr_info("Sending NMI to %s CPUs:\n", 52 pr_info("Sending NMI to %s CPUs:\n",
84 (include_self ? "all" : "other")); 53 (include_self ? "all" : "other"));
@@ -94,73 +63,25 @@ void nmi_trigger_all_cpu_backtrace(bool include_self,
94 } 63 }
95 64
96 /* 65 /*
97 * Now that all the NMIs have triggered, we can dump out their 66 * Force flush any remote buffers that might be stuck in IRQ context
98 * back traces safely to the console. 67 * and therefore could not run their irq_work.
99 */ 68 */
100 for_each_cpu(cpu, &printtrace_mask) { 69 printk_nmi_flush();
101 int len, last_i = 0;
102 70
103 s = &per_cpu(nmi_print_seq, cpu); 71 clear_bit_unlock(0, &backtrace_flag);
104 len = seq_buf_used(&s->seq);
105 if (!len)
106 continue;
107
108 /* Print line by line. */
109 for (i = 0; i < len; i++) {
110 if (s->buffer[i] == '\n') {
111 print_seq_line(s, last_i, i);
112 last_i = i + 1;
113 }
114 }
115 /* Check if there was a partial line. */
116 if (last_i < len) {
117 print_seq_line(s, last_i, len - 1);
118 pr_cont("\n");
119 }
120 }
121
122 clear_bit(0, &backtrace_flag);
123 smp_mb__after_atomic();
124 put_cpu(); 72 put_cpu();
125} 73}
126 74
127/*
128 * It is not safe to call printk() directly from NMI handlers.
129 * It may be fine if the NMI detected a lock up and we have no choice
130 * but to do so, but doing a NMI on all other CPUs to get a back trace
131 * can be done with a sysrq-l. We don't want that to lock up, which
132 * can happen if the NMI interrupts a printk in progress.
133 *
134 * Instead, we redirect the vprintk() to this nmi_vprintk() that writes
135 * the content into a per cpu seq_buf buffer. Then when the NMIs are
136 * all done, we can safely dump the contents of the seq_buf to a printk()
137 * from a non NMI context.
138 */
139static int nmi_vprintk(const char *fmt, va_list args)
140{
141 struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
142 unsigned int len = seq_buf_used(&s->seq);
143
144 seq_buf_vprintf(&s->seq, fmt, args);
145 return seq_buf_used(&s->seq) - len;
146}
147
148bool nmi_cpu_backtrace(struct pt_regs *regs) 75bool nmi_cpu_backtrace(struct pt_regs *regs)
149{ 76{
150 int cpu = smp_processor_id(); 77 int cpu = smp_processor_id();
151 78
152 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 79 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
153 printk_func_t printk_func_save = this_cpu_read(printk_func);
154
155 /* Replace printk to write into the NMI seq */
156 this_cpu_write(printk_func, nmi_vprintk);
157 pr_warn("NMI backtrace for cpu %d\n", cpu); 80 pr_warn("NMI backtrace for cpu %d\n", cpu);
158 if (regs) 81 if (regs)
159 show_regs(regs); 82 show_regs(regs);
160 else 83 else
161 dump_stack(); 84 dump_stack();
162 this_cpu_write(printk_func, printk_func_save);
163
164 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); 85 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
165 return true; 86 return true;
166 } 87 }
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 1624c4117961..8b7d8459bb9d 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -4,6 +4,8 @@
4 * Copyright (C) 2005 SGI, Christoph Lameter 4 * Copyright (C) 2005 SGI, Christoph Lameter
5 * Copyright (C) 2006 Nick Piggin 5 * Copyright (C) 2006 Nick Piggin
6 * Copyright (C) 2012 Konstantin Khlebnikov 6 * Copyright (C) 2012 Konstantin Khlebnikov
7 * Copyright (C) 2016 Intel, Matthew Wilcox
8 * Copyright (C) 2016 Intel, Ross Zwisler
7 * 9 *
8 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as 11 * modify it under the terms of the GNU General Public License as
@@ -37,12 +39,6 @@
37 39
38 40
39/* 41/*
40 * The height_to_maxindex array needs to be one deeper than the maximum
41 * path as height 0 holds only 1 entry.
42 */
43static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
44
45/*
46 * Radix tree node cache. 42 * Radix tree node cache.
47 */ 43 */
48static struct kmem_cache *radix_tree_node_cachep; 44static struct kmem_cache *radix_tree_node_cachep;
@@ -64,20 +60,58 @@ static struct kmem_cache *radix_tree_node_cachep;
64 * Per-cpu pool of preloaded nodes 60 * Per-cpu pool of preloaded nodes
65 */ 61 */
66struct radix_tree_preload { 62struct radix_tree_preload {
67 int nr; 63 unsigned nr;
68 /* nodes->private_data points to next preallocated node */ 64 /* nodes->private_data points to next preallocated node */
69 struct radix_tree_node *nodes; 65 struct radix_tree_node *nodes;
70}; 66};
71static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 67static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
72 68
73static inline void *ptr_to_indirect(void *ptr) 69static inline void *node_to_entry(void *ptr)
70{
71 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
72}
73
74#define RADIX_TREE_RETRY node_to_entry(NULL)
75
76#ifdef CONFIG_RADIX_TREE_MULTIORDER
77/* Sibling slots point directly to another slot in the same node */
78static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node)
79{
80 void **ptr = node;
81 return (parent->slots <= ptr) &&
82 (ptr < parent->slots + RADIX_TREE_MAP_SIZE);
83}
84#else
85static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node)
74{ 86{
75 return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR); 87 return false;
76} 88}
89#endif
77 90
78static inline void *indirect_to_ptr(void *ptr) 91static inline unsigned long get_slot_offset(struct radix_tree_node *parent,
92 void **slot)
79{ 93{
80 return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); 94 return slot - parent->slots;
95}
96
97static unsigned int radix_tree_descend(struct radix_tree_node *parent,
98 struct radix_tree_node **nodep, unsigned long index)
99{
100 unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
101 void **entry = rcu_dereference_raw(parent->slots[offset]);
102
103#ifdef CONFIG_RADIX_TREE_MULTIORDER
104 if (radix_tree_is_internal_node(entry)) {
105 unsigned long siboff = get_slot_offset(parent, entry);
106 if (siboff < RADIX_TREE_MAP_SIZE) {
107 offset = siboff;
108 entry = rcu_dereference_raw(parent->slots[offset]);
109 }
110 }
111#endif
112
113 *nodep = (void *)entry;
114 return offset;
81} 115}
82 116
83static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 117static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
@@ -108,7 +142,7 @@ static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
108 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 142 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
109} 143}
110 144
111static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) 145static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
112{ 146{
113 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 147 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
114} 148}
@@ -120,7 +154,12 @@ static inline void root_tag_clear_all(struct radix_tree_root *root)
120 154
121static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) 155static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
122{ 156{
123 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 157 return (__force int)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
158}
159
160static inline unsigned root_tags_get(struct radix_tree_root *root)
161{
162 return (__force unsigned)root->gfp_mask >> __GFP_BITS_SHIFT;
124} 163}
125 164
126/* 165/*
@@ -129,7 +168,7 @@ static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
129 */ 168 */
130static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) 169static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
131{ 170{
132 int idx; 171 unsigned idx;
133 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { 172 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
134 if (node->tags[tag][idx]) 173 if (node->tags[tag][idx])
135 return 1; 174 return 1;
@@ -173,38 +212,45 @@ radix_tree_find_next_bit(const unsigned long *addr,
173 return size; 212 return size;
174} 213}
175 214
176#if 0 215#ifndef __KERNEL__
177static void dump_node(void *slot, int height, int offset) 216static void dump_node(struct radix_tree_node *node, unsigned long index)
178{ 217{
179 struct radix_tree_node *node; 218 unsigned long i;
180 int i;
181 219
182 if (!slot) 220 pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d parent %p\n",
183 return; 221 node, node->offset,
222 node->tags[0][0], node->tags[1][0], node->tags[2][0],
223 node->shift, node->count, node->parent);
184 224
185 if (height == 0) { 225 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
186 pr_debug("radix entry %p offset %d\n", slot, offset); 226 unsigned long first = index | (i << node->shift);
187 return; 227 unsigned long last = first | ((1UL << node->shift) - 1);
228 void *entry = node->slots[i];
229 if (!entry)
230 continue;
231 if (is_sibling_entry(node, entry)) {
232 pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n",
233 entry, i,
234 *(void **)entry_to_node(entry),
235 first, last);
236 } else if (!radix_tree_is_internal_node(entry)) {
237 pr_debug("radix entry %p offset %ld indices %ld-%ld\n",
238 entry, i, first, last);
239 } else {
240 dump_node(entry_to_node(entry), first);
241 }
188 } 242 }
189
190 node = indirect_to_ptr(slot);
191 pr_debug("radix node: %p offset %d tags %lx %lx %lx path %x count %d parent %p\n",
192 slot, offset, node->tags[0][0], node->tags[1][0],
193 node->tags[2][0], node->path, node->count, node->parent);
194
195 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
196 dump_node(node->slots[i], height - 1, i);
197} 243}
198 244
199/* For debug */ 245/* For debug */
200static void radix_tree_dump(struct radix_tree_root *root) 246static void radix_tree_dump(struct radix_tree_root *root)
201{ 247{
202 pr_debug("radix root: %p height %d rnode %p tags %x\n", 248 pr_debug("radix root: %p rnode %p tags %x\n",
203 root, root->height, root->rnode, 249 root, root->rnode,
204 root->gfp_mask >> __GFP_BITS_SHIFT); 250 root->gfp_mask >> __GFP_BITS_SHIFT);
205 if (!radix_tree_is_indirect_ptr(root->rnode)) 251 if (!radix_tree_is_internal_node(root->rnode))
206 return; 252 return;
207 dump_node(root->rnode, root->height, 0); 253 dump_node(entry_to_node(root->rnode), 0);
208} 254}
209#endif 255#endif
210 256
@@ -219,9 +265,9 @@ radix_tree_node_alloc(struct radix_tree_root *root)
219 gfp_t gfp_mask = root_gfp_mask(root); 265 gfp_t gfp_mask = root_gfp_mask(root);
220 266
221 /* 267 /*
222 * Preload code isn't irq safe and it doesn't make sence to use 268 * Preload code isn't irq safe and it doesn't make sense to use
223 * preloading in the interrupt anyway as all the allocations have to 269 * preloading during an interrupt anyway as all the allocations have
224 * be atomic. So just do normal allocation when in interrupt. 270 * to be atomic. So just do normal allocation when in interrupt.
225 */ 271 */
226 if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { 272 if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
227 struct radix_tree_preload *rtp; 273 struct radix_tree_preload *rtp;
@@ -257,7 +303,7 @@ radix_tree_node_alloc(struct radix_tree_root *root)
257 ret = kmem_cache_alloc(radix_tree_node_cachep, 303 ret = kmem_cache_alloc(radix_tree_node_cachep,
258 gfp_mask | __GFP_ACCOUNT); 304 gfp_mask | __GFP_ACCOUNT);
259out: 305out:
260 BUG_ON(radix_tree_is_indirect_ptr(ret)); 306 BUG_ON(radix_tree_is_internal_node(ret));
261 return ret; 307 return ret;
262} 308}
263 309
@@ -357,38 +403,58 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
357EXPORT_SYMBOL(radix_tree_maybe_preload); 403EXPORT_SYMBOL(radix_tree_maybe_preload);
358 404
359/* 405/*
360 * Return the maximum key which can be store into a 406 * The maximum index which can be stored in a radix tree
361 * radix tree with height HEIGHT.
362 */ 407 */
363static inline unsigned long radix_tree_maxindex(unsigned int height) 408static inline unsigned long shift_maxindex(unsigned int shift)
409{
410 return (RADIX_TREE_MAP_SIZE << shift) - 1;
411}
412
413static inline unsigned long node_maxindex(struct radix_tree_node *node)
414{
415 return shift_maxindex(node->shift);
416}
417
418static unsigned radix_tree_load_root(struct radix_tree_root *root,
419 struct radix_tree_node **nodep, unsigned long *maxindex)
364{ 420{
365 return height_to_maxindex[height]; 421 struct radix_tree_node *node = rcu_dereference_raw(root->rnode);
422
423 *nodep = node;
424
425 if (likely(radix_tree_is_internal_node(node))) {
426 node = entry_to_node(node);
427 *maxindex = node_maxindex(node);
428 return node->shift + RADIX_TREE_MAP_SHIFT;
429 }
430
431 *maxindex = 0;
432 return 0;
366} 433}
367 434
368/* 435/*
369 * Extend a radix tree so it can store key @index. 436 * Extend a radix tree so it can store key @index.
370 */ 437 */
371static int radix_tree_extend(struct radix_tree_root *root, 438static int radix_tree_extend(struct radix_tree_root *root,
372 unsigned long index, unsigned order) 439 unsigned long index, unsigned int shift)
373{ 440{
374 struct radix_tree_node *node;
375 struct radix_tree_node *slot; 441 struct radix_tree_node *slot;
376 unsigned int height; 442 unsigned int maxshift;
377 int tag; 443 int tag;
378 444
379 /* Figure out what the height should be. */ 445 /* Figure out what the shift should be. */
380 height = root->height + 1; 446 maxshift = shift;
381 while (index > radix_tree_maxindex(height)) 447 while (index > shift_maxindex(maxshift))
382 height++; 448 maxshift += RADIX_TREE_MAP_SHIFT;
383 449
384 if ((root->rnode == NULL) && (order == 0)) { 450 slot = root->rnode;
385 root->height = height; 451 if (!slot)
386 goto out; 452 goto out;
387 }
388 453
389 do { 454 do {
390 unsigned int newheight; 455 struct radix_tree_node *node = radix_tree_node_alloc(root);
391 if (!(node = radix_tree_node_alloc(root))) 456
457 if (!node)
392 return -ENOMEM; 458 return -ENOMEM;
393 459
394 /* Propagate the aggregated tag info into the new root */ 460 /* Propagate the aggregated tag info into the new root */
@@ -397,25 +463,20 @@ static int radix_tree_extend(struct radix_tree_root *root,
397 tag_set(node, tag, 0); 463 tag_set(node, tag, 0);
398 } 464 }
399 465
400 /* Increase the height. */ 466 BUG_ON(shift > BITS_PER_LONG);
401 newheight = root->height+1; 467 node->shift = shift;
402 BUG_ON(newheight & ~RADIX_TREE_HEIGHT_MASK); 468 node->offset = 0;
403 node->path = newheight;
404 node->count = 1; 469 node->count = 1;
405 node->parent = NULL; 470 node->parent = NULL;
406 slot = root->rnode; 471 if (radix_tree_is_internal_node(slot))
407 if (radix_tree_is_indirect_ptr(slot) && newheight > 1) { 472 entry_to_node(slot)->parent = node;
408 slot = indirect_to_ptr(slot);
409 slot->parent = node;
410 slot = ptr_to_indirect(slot);
411 }
412 node->slots[0] = slot; 473 node->slots[0] = slot;
413 node = ptr_to_indirect(node); 474 slot = node_to_entry(node);
414 rcu_assign_pointer(root->rnode, node); 475 rcu_assign_pointer(root->rnode, slot);
415 root->height = newheight; 476 shift += RADIX_TREE_MAP_SHIFT;
416 } while (height > root->height); 477 } while (shift <= maxshift);
417out: 478out:
418 return 0; 479 return maxshift + RADIX_TREE_MAP_SHIFT;
419} 480}
420 481
421/** 482/**
@@ -439,71 +500,70 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
439 unsigned order, struct radix_tree_node **nodep, 500 unsigned order, struct radix_tree_node **nodep,
440 void ***slotp) 501 void ***slotp)
441{ 502{
442 struct radix_tree_node *node = NULL, *slot; 503 struct radix_tree_node *node = NULL, *child;
443 unsigned int height, shift, offset; 504 void **slot = (void **)&root->rnode;
444 int error; 505 unsigned long maxindex;
506 unsigned int shift, offset = 0;
507 unsigned long max = index | ((1UL << order) - 1);
445 508
446 BUG_ON((0 < order) && (order < RADIX_TREE_MAP_SHIFT)); 509 shift = radix_tree_load_root(root, &child, &maxindex);
447 510
448 /* Make sure the tree is high enough. */ 511 /* Make sure the tree is high enough. */
449 if (index > radix_tree_maxindex(root->height)) { 512 if (max > maxindex) {
450 error = radix_tree_extend(root, index, order); 513 int error = radix_tree_extend(root, max, shift);
451 if (error) 514 if (error < 0)
452 return error; 515 return error;
516 shift = error;
517 child = root->rnode;
518 if (order == shift)
519 shift += RADIX_TREE_MAP_SHIFT;
453 } 520 }
454 521
455 slot = root->rnode;
456
457 height = root->height;
458 shift = height * RADIX_TREE_MAP_SHIFT;
459
460 offset = 0; /* uninitialised var warning */
461 while (shift > order) { 522 while (shift > order) {
462 if (slot == NULL) { 523 shift -= RADIX_TREE_MAP_SHIFT;
524 if (child == NULL) {
463 /* Have to add a child node. */ 525 /* Have to add a child node. */
464 if (!(slot = radix_tree_node_alloc(root))) 526 child = radix_tree_node_alloc(root);
527 if (!child)
465 return -ENOMEM; 528 return -ENOMEM;
466 slot->path = height; 529 child->shift = shift;
467 slot->parent = node; 530 child->offset = offset;
468 if (node) { 531 child->parent = node;
469 rcu_assign_pointer(node->slots[offset], 532 rcu_assign_pointer(*slot, node_to_entry(child));
470 ptr_to_indirect(slot)); 533 if (node)
471 node->count++; 534 node->count++;
472 slot->path |= offset << RADIX_TREE_HEIGHT_SHIFT; 535 } else if (!radix_tree_is_internal_node(child))
473 } else
474 rcu_assign_pointer(root->rnode,
475 ptr_to_indirect(slot));
476 } else if (!radix_tree_is_indirect_ptr(slot))
477 break; 536 break;
478 537
479 /* Go a level down */ 538 /* Go a level down */
480 height--; 539 node = entry_to_node(child);
481 shift -= RADIX_TREE_MAP_SHIFT; 540 offset = radix_tree_descend(node, &child, index);
482 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 541 slot = &node->slots[offset];
483 node = indirect_to_ptr(slot);
484 slot = node->slots[offset];
485 } 542 }
486 543
544#ifdef CONFIG_RADIX_TREE_MULTIORDER
487 /* Insert pointers to the canonical entry */ 545 /* Insert pointers to the canonical entry */
488 if ((shift - order) > 0) { 546 if (order > shift) {
489 int i, n = 1 << (shift - order); 547 unsigned i, n = 1 << (order - shift);
490 offset = offset & ~(n - 1); 548 offset = offset & ~(n - 1);
491 slot = ptr_to_indirect(&node->slots[offset]); 549 slot = &node->slots[offset];
550 child = node_to_entry(slot);
492 for (i = 0; i < n; i++) { 551 for (i = 0; i < n; i++) {
493 if (node->slots[offset + i]) 552 if (slot[i])
494 return -EEXIST; 553 return -EEXIST;
495 } 554 }
496 555
497 for (i = 1; i < n; i++) { 556 for (i = 1; i < n; i++) {
498 rcu_assign_pointer(node->slots[offset + i], slot); 557 rcu_assign_pointer(slot[i], child);
499 node->count++; 558 node->count++;
500 } 559 }
501 } 560 }
561#endif
502 562
503 if (nodep) 563 if (nodep)
504 *nodep = node; 564 *nodep = node;
505 if (slotp) 565 if (slotp)
506 *slotp = node ? node->slots + offset : (void **)&root->rnode; 566 *slotp = slot;
507 return 0; 567 return 0;
508} 568}
509 569
@@ -523,7 +583,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
523 void **slot; 583 void **slot;
524 int error; 584 int error;
525 585
526 BUG_ON(radix_tree_is_indirect_ptr(item)); 586 BUG_ON(radix_tree_is_internal_node(item));
527 587
528 error = __radix_tree_create(root, index, order, &node, &slot); 588 error = __radix_tree_create(root, index, order, &node, &slot);
529 if (error) 589 if (error)
@@ -533,12 +593,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
533 rcu_assign_pointer(*slot, item); 593 rcu_assign_pointer(*slot, item);
534 594
535 if (node) { 595 if (node) {
596 unsigned offset = get_slot_offset(node, slot);
536 node->count++; 597 node->count++;
537 BUG_ON(tag_get(node, 0, index & RADIX_TREE_MAP_MASK)); 598 BUG_ON(tag_get(node, 0, offset));
538 BUG_ON(tag_get(node, 1, index & RADIX_TREE_MAP_MASK)); 599 BUG_ON(tag_get(node, 1, offset));
600 BUG_ON(tag_get(node, 2, offset));
539 } else { 601 } else {
540 BUG_ON(root_tag_get(root, 0)); 602 BUG_ON(root_tags_get(root));
541 BUG_ON(root_tag_get(root, 1));
542 } 603 }
543 604
544 return 0; 605 return 0;
@@ -563,44 +624,25 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
563 struct radix_tree_node **nodep, void ***slotp) 624 struct radix_tree_node **nodep, void ***slotp)
564{ 625{
565 struct radix_tree_node *node, *parent; 626 struct radix_tree_node *node, *parent;
566 unsigned int height, shift; 627 unsigned long maxindex;
567 void **slot; 628 void **slot;
568 629
569 node = rcu_dereference_raw(root->rnode); 630 restart:
570 if (node == NULL) 631 parent = NULL;
632 slot = (void **)&root->rnode;
633 radix_tree_load_root(root, &node, &maxindex);
634 if (index > maxindex)
571 return NULL; 635 return NULL;
572 636
573 if (!radix_tree_is_indirect_ptr(node)) { 637 while (radix_tree_is_internal_node(node)) {
574 if (index > 0) 638 unsigned offset;
575 return NULL;
576 639
577 if (nodep) 640 if (node == RADIX_TREE_RETRY)
578 *nodep = NULL; 641 goto restart;
579 if (slotp) 642 parent = entry_to_node(node);
580 *slotp = (void **)&root->rnode; 643 offset = radix_tree_descend(parent, &node, index);
581 return node; 644 slot = parent->slots + offset;
582 } 645 }
583 node = indirect_to_ptr(node);
584
585 height = node->path & RADIX_TREE_HEIGHT_MASK;
586 if (index > radix_tree_maxindex(height))
587 return NULL;
588
589 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
590
591 do {
592 parent = node;
593 slot = node->slots + ((index >> shift) & RADIX_TREE_MAP_MASK);
594 node = rcu_dereference_raw(*slot);
595 if (node == NULL)
596 return NULL;
597 if (!radix_tree_is_indirect_ptr(node))
598 break;
599 node = indirect_to_ptr(node);
600
601 shift -= RADIX_TREE_MAP_SHIFT;
602 height--;
603 } while (height > 0);
604 646
605 if (nodep) 647 if (nodep)
606 *nodep = parent; 648 *nodep = parent;
@@ -654,59 +696,72 @@ EXPORT_SYMBOL(radix_tree_lookup);
654 * radix_tree_tag_set - set a tag on a radix tree node 696 * radix_tree_tag_set - set a tag on a radix tree node
655 * @root: radix tree root 697 * @root: radix tree root
656 * @index: index key 698 * @index: index key
657 * @tag: tag index 699 * @tag: tag index
658 * 700 *
659 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) 701 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
660 * corresponding to @index in the radix tree. From 702 * corresponding to @index in the radix tree. From
661 * the root all the way down to the leaf node. 703 * the root all the way down to the leaf node.
662 * 704 *
663 * Returns the address of the tagged item. Setting a tag on a not-present 705 * Returns the address of the tagged item. Setting a tag on a not-present
664 * item is a bug. 706 * item is a bug.
665 */ 707 */
666void *radix_tree_tag_set(struct radix_tree_root *root, 708void *radix_tree_tag_set(struct radix_tree_root *root,
667 unsigned long index, unsigned int tag) 709 unsigned long index, unsigned int tag)
668{ 710{
669 unsigned int height, shift; 711 struct radix_tree_node *node, *parent;
670 struct radix_tree_node *slot; 712 unsigned long maxindex;
671 713
672 height = root->height; 714 radix_tree_load_root(root, &node, &maxindex);
673 BUG_ON(index > radix_tree_maxindex(height)); 715 BUG_ON(index > maxindex);
674 716
675 slot = indirect_to_ptr(root->rnode); 717 while (radix_tree_is_internal_node(node)) {
676 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 718 unsigned offset;
677 719
678 while (height > 0) { 720 parent = entry_to_node(node);
679 int offset; 721 offset = radix_tree_descend(parent, &node, index);
722 BUG_ON(!node);
680 723
681 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 724 if (!tag_get(parent, tag, offset))
682 if (!tag_get(slot, tag, offset)) 725 tag_set(parent, tag, offset);
683 tag_set(slot, tag, offset);
684 slot = slot->slots[offset];
685 BUG_ON(slot == NULL);
686 if (!radix_tree_is_indirect_ptr(slot))
687 break;
688 slot = indirect_to_ptr(slot);
689 shift -= RADIX_TREE_MAP_SHIFT;
690 height--;
691 } 726 }
692 727
693 /* set the root's tag bit */ 728 /* set the root's tag bit */
694 if (slot && !root_tag_get(root, tag)) 729 if (!root_tag_get(root, tag))
695 root_tag_set(root, tag); 730 root_tag_set(root, tag);
696 731
697 return slot; 732 return node;
698} 733}
699EXPORT_SYMBOL(radix_tree_tag_set); 734EXPORT_SYMBOL(radix_tree_tag_set);
700 735
736static void node_tag_clear(struct radix_tree_root *root,
737 struct radix_tree_node *node,
738 unsigned int tag, unsigned int offset)
739{
740 while (node) {
741 if (!tag_get(node, tag, offset))
742 return;
743 tag_clear(node, tag, offset);
744 if (any_tag_set(node, tag))
745 return;
746
747 offset = node->offset;
748 node = node->parent;
749 }
750
751 /* clear the root's tag bit */
752 if (root_tag_get(root, tag))
753 root_tag_clear(root, tag);
754}
755
701/** 756/**
702 * radix_tree_tag_clear - clear a tag on a radix tree node 757 * radix_tree_tag_clear - clear a tag on a radix tree node
703 * @root: radix tree root 758 * @root: radix tree root
704 * @index: index key 759 * @index: index key
705 * @tag: tag index 760 * @tag: tag index
706 * 761 *
707 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) 762 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
708 * corresponding to @index in the radix tree. If 763 * corresponding to @index in the radix tree. If this causes
709 * this causes the leaf node to have no tags set then clear the tag in the 764 * the leaf node to have no tags set then clear the tag in the
710 * next-to-leaf node, etc. 765 * next-to-leaf node, etc.
711 * 766 *
712 * Returns the address of the tagged item on success, else NULL. ie: 767 * Returns the address of the tagged item on success, else NULL. ie:
@@ -715,52 +770,25 @@ EXPORT_SYMBOL(radix_tree_tag_set);
715void *radix_tree_tag_clear(struct radix_tree_root *root, 770void *radix_tree_tag_clear(struct radix_tree_root *root,
716 unsigned long index, unsigned int tag) 771 unsigned long index, unsigned int tag)
717{ 772{
718 struct radix_tree_node *node = NULL; 773 struct radix_tree_node *node, *parent;
719 struct radix_tree_node *slot = NULL; 774 unsigned long maxindex;
720 unsigned int height, shift;
721 int uninitialized_var(offset); 775 int uninitialized_var(offset);
722 776
723 height = root->height; 777 radix_tree_load_root(root, &node, &maxindex);
724 if (index > radix_tree_maxindex(height)) 778 if (index > maxindex)
725 goto out; 779 return NULL;
726
727 shift = height * RADIX_TREE_MAP_SHIFT;
728 slot = root->rnode;
729
730 while (shift) {
731 if (slot == NULL)
732 goto out;
733 if (!radix_tree_is_indirect_ptr(slot))
734 break;
735 slot = indirect_to_ptr(slot);
736
737 shift -= RADIX_TREE_MAP_SHIFT;
738 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
739 node = slot;
740 slot = slot->slots[offset];
741 }
742
743 if (slot == NULL)
744 goto out;
745 780
746 while (node) { 781 parent = NULL;
747 if (!tag_get(node, tag, offset))
748 goto out;
749 tag_clear(node, tag, offset);
750 if (any_tag_set(node, tag))
751 goto out;
752 782
753 index >>= RADIX_TREE_MAP_SHIFT; 783 while (radix_tree_is_internal_node(node)) {
754 offset = index & RADIX_TREE_MAP_MASK; 784 parent = entry_to_node(node);
755 node = node->parent; 785 offset = radix_tree_descend(parent, &node, index);
756 } 786 }
757 787
758 /* clear the root's tag bit */ 788 if (node)
759 if (root_tag_get(root, tag)) 789 node_tag_clear(root, parent, tag, offset);
760 root_tag_clear(root, tag);
761 790
762out: 791 return node;
763 return slot;
764} 792}
765EXPORT_SYMBOL(radix_tree_tag_clear); 793EXPORT_SYMBOL(radix_tree_tag_clear);
766 794
@@ -768,7 +796,7 @@ EXPORT_SYMBOL(radix_tree_tag_clear);
768 * radix_tree_tag_get - get a tag on a radix tree node 796 * radix_tree_tag_get - get a tag on a radix tree node
769 * @root: radix tree root 797 * @root: radix tree root
770 * @index: index key 798 * @index: index key
771 * @tag: tag index (< RADIX_TREE_MAX_TAGS) 799 * @tag: tag index (< RADIX_TREE_MAX_TAGS)
772 * 800 *
773 * Return values: 801 * Return values:
774 * 802 *
@@ -782,48 +810,44 @@ EXPORT_SYMBOL(radix_tree_tag_clear);
782int radix_tree_tag_get(struct radix_tree_root *root, 810int radix_tree_tag_get(struct radix_tree_root *root,
783 unsigned long index, unsigned int tag) 811 unsigned long index, unsigned int tag)
784{ 812{
785 unsigned int height, shift; 813 struct radix_tree_node *node, *parent;
786 struct radix_tree_node *node; 814 unsigned long maxindex;
787 815
788 /* check the root's tag bit */
789 if (!root_tag_get(root, tag)) 816 if (!root_tag_get(root, tag))
790 return 0; 817 return 0;
791 818
792 node = rcu_dereference_raw(root->rnode); 819 radix_tree_load_root(root, &node, &maxindex);
793 if (node == NULL) 820 if (index > maxindex)
794 return 0; 821 return 0;
795 822 if (node == NULL)
796 if (!radix_tree_is_indirect_ptr(node))
797 return (index == 0);
798 node = indirect_to_ptr(node);
799
800 height = node->path & RADIX_TREE_HEIGHT_MASK;
801 if (index > radix_tree_maxindex(height))
802 return 0; 823 return 0;
803 824
804 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 825 while (radix_tree_is_internal_node(node)) {
826 unsigned offset;
805 827
806 for ( ; ; ) { 828 parent = entry_to_node(node);
807 int offset; 829 offset = radix_tree_descend(parent, &node, index);
808 830
809 if (node == NULL) 831 if (!node)
810 return 0; 832 return 0;
811 node = indirect_to_ptr(node); 833 if (!tag_get(parent, tag, offset))
812
813 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
814 if (!tag_get(node, tag, offset))
815 return 0; 834 return 0;
816 if (height == 1) 835 if (node == RADIX_TREE_RETRY)
817 return 1; 836 break;
818 node = rcu_dereference_raw(node->slots[offset]);
819 if (!radix_tree_is_indirect_ptr(node))
820 return 1;
821 shift -= RADIX_TREE_MAP_SHIFT;
822 height--;
823 } 837 }
838
839 return 1;
824} 840}
825EXPORT_SYMBOL(radix_tree_tag_get); 841EXPORT_SYMBOL(radix_tree_tag_get);
826 842
843static inline void __set_iter_shift(struct radix_tree_iter *iter,
844 unsigned int shift)
845{
846#ifdef CONFIG_RADIX_TREE_MULTIORDER
847 iter->shift = shift;
848#endif
849}
850
827/** 851/**
828 * radix_tree_next_chunk - find next chunk of slots for iteration 852 * radix_tree_next_chunk - find next chunk of slots for iteration
829 * 853 *
@@ -835,9 +859,9 @@ EXPORT_SYMBOL(radix_tree_tag_get);
835void **radix_tree_next_chunk(struct radix_tree_root *root, 859void **radix_tree_next_chunk(struct radix_tree_root *root,
836 struct radix_tree_iter *iter, unsigned flags) 860 struct radix_tree_iter *iter, unsigned flags)
837{ 861{
838 unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK; 862 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
839 struct radix_tree_node *rnode, *node; 863 struct radix_tree_node *node, *child;
840 unsigned long index, offset, height; 864 unsigned long index, offset, maxindex;
841 865
842 if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) 866 if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
843 return NULL; 867 return NULL;
@@ -855,33 +879,28 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
855 if (!index && iter->index) 879 if (!index && iter->index)
856 return NULL; 880 return NULL;
857 881
858 rnode = rcu_dereference_raw(root->rnode); 882 restart:
859 if (radix_tree_is_indirect_ptr(rnode)) { 883 radix_tree_load_root(root, &child, &maxindex);
860 rnode = indirect_to_ptr(rnode); 884 if (index > maxindex)
861 } else if (rnode && !index) { 885 return NULL;
886 if (!child)
887 return NULL;
888
889 if (!radix_tree_is_internal_node(child)) {
862 /* Single-slot tree */ 890 /* Single-slot tree */
863 iter->index = 0; 891 iter->index = index;
864 iter->next_index = 1; 892 iter->next_index = maxindex + 1;
865 iter->tags = 1; 893 iter->tags = 1;
894 __set_iter_shift(iter, 0);
866 return (void **)&root->rnode; 895 return (void **)&root->rnode;
867 } else 896 }
868 return NULL;
869
870restart:
871 height = rnode->path & RADIX_TREE_HEIGHT_MASK;
872 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
873 offset = index >> shift;
874 897
875 /* Index outside of the tree */ 898 do {
876 if (offset >= RADIX_TREE_MAP_SIZE) 899 node = entry_to_node(child);
877 return NULL; 900 offset = radix_tree_descend(node, &child, index);
878 901
879 node = rnode;
880 while (1) {
881 struct radix_tree_node *slot;
882 if ((flags & RADIX_TREE_ITER_TAGGED) ? 902 if ((flags & RADIX_TREE_ITER_TAGGED) ?
883 !test_bit(offset, node->tags[tag]) : 903 !tag_get(node, tag, offset) : !child) {
884 !node->slots[offset]) {
885 /* Hole detected */ 904 /* Hole detected */
886 if (flags & RADIX_TREE_ITER_CONTIG) 905 if (flags & RADIX_TREE_ITER_CONTIG)
887 return NULL; 906 return NULL;
@@ -893,35 +912,30 @@ restart:
893 offset + 1); 912 offset + 1);
894 else 913 else
895 while (++offset < RADIX_TREE_MAP_SIZE) { 914 while (++offset < RADIX_TREE_MAP_SIZE) {
896 if (node->slots[offset]) 915 void *slot = node->slots[offset];
916 if (is_sibling_entry(node, slot))
917 continue;
918 if (slot)
897 break; 919 break;
898 } 920 }
899 index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); 921 index &= ~node_maxindex(node);
900 index += offset << shift; 922 index += offset << node->shift;
901 /* Overflow after ~0UL */ 923 /* Overflow after ~0UL */
902 if (!index) 924 if (!index)
903 return NULL; 925 return NULL;
904 if (offset == RADIX_TREE_MAP_SIZE) 926 if (offset == RADIX_TREE_MAP_SIZE)
905 goto restart; 927 goto restart;
928 child = rcu_dereference_raw(node->slots[offset]);
906 } 929 }
907 930
908 /* This is leaf-node */ 931 if ((child == NULL) || (child == RADIX_TREE_RETRY))
909 if (!shift)
910 break;
911
912 slot = rcu_dereference_raw(node->slots[offset]);
913 if (slot == NULL)
914 goto restart; 932 goto restart;
915 if (!radix_tree_is_indirect_ptr(slot)) 933 } while (radix_tree_is_internal_node(child));
916 break;
917 node = indirect_to_ptr(slot);
918 shift -= RADIX_TREE_MAP_SHIFT;
919 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
920 }
921 934
922 /* Update the iterator state */ 935 /* Update the iterator state */
923 iter->index = index; 936 iter->index = (index &~ node_maxindex(node)) | (offset << node->shift);
924 iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1; 937 iter->next_index = (index | node_maxindex(node)) + 1;
938 __set_iter_shift(iter, node->shift);
925 939
926 /* Construct iter->tags bit-mask from node->tags[tag] array */ 940 /* Construct iter->tags bit-mask from node->tags[tag] array */
927 if (flags & RADIX_TREE_ITER_TAGGED) { 941 if (flags & RADIX_TREE_ITER_TAGGED) {
@@ -967,7 +981,7 @@ EXPORT_SYMBOL(radix_tree_next_chunk);
967 * set is outside the range we are scanning. This reults in dangling tags and 981 * set is outside the range we are scanning. This reults in dangling tags and
968 * can lead to problems with later tag operations (e.g. livelocks on lookups). 982 * can lead to problems with later tag operations (e.g. livelocks on lookups).
969 * 983 *
970 * The function returns number of leaves where the tag was set and sets 984 * The function returns the number of leaves where the tag was set and sets
971 * *first_indexp to the first unscanned index. 985 * *first_indexp to the first unscanned index.
972 * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must 986 * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
973 * be prepared to handle that. 987 * be prepared to handle that.
@@ -977,14 +991,13 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
977 unsigned long nr_to_tag, 991 unsigned long nr_to_tag,
978 unsigned int iftag, unsigned int settag) 992 unsigned int iftag, unsigned int settag)
979{ 993{
980 unsigned int height = root->height; 994 struct radix_tree_node *parent, *node, *child;
981 struct radix_tree_node *node = NULL; 995 unsigned long maxindex;
982 struct radix_tree_node *slot;
983 unsigned int shift;
984 unsigned long tagged = 0; 996 unsigned long tagged = 0;
985 unsigned long index = *first_indexp; 997 unsigned long index = *first_indexp;
986 998
987 last_index = min(last_index, radix_tree_maxindex(height)); 999 radix_tree_load_root(root, &child, &maxindex);
1000 last_index = min(last_index, maxindex);
988 if (index > last_index) 1001 if (index > last_index)
989 return 0; 1002 return 0;
990 if (!nr_to_tag) 1003 if (!nr_to_tag)
@@ -993,80 +1006,62 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
993 *first_indexp = last_index + 1; 1006 *first_indexp = last_index + 1;
994 return 0; 1007 return 0;
995 } 1008 }
996 if (height == 0) { 1009 if (!radix_tree_is_internal_node(child)) {
997 *first_indexp = last_index + 1; 1010 *first_indexp = last_index + 1;
998 root_tag_set(root, settag); 1011 root_tag_set(root, settag);
999 return 1; 1012 return 1;
1000 } 1013 }
1001 1014
1002 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 1015 node = entry_to_node(child);
1003 slot = indirect_to_ptr(root->rnode);
1004 1016
1005 for (;;) { 1017 for (;;) {
1006 unsigned long upindex; 1018 unsigned offset = radix_tree_descend(node, &child, index);
1007 int offset; 1019 if (!child)
1008
1009 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
1010 if (!slot->slots[offset])
1011 goto next; 1020 goto next;
1012 if (!tag_get(slot, iftag, offset)) 1021 if (!tag_get(node, iftag, offset))
1013 goto next; 1022 goto next;
1014 if (shift) { 1023 /* Sibling slots never have tags set on them */
1015 node = slot; 1024 if (radix_tree_is_internal_node(child)) {
1016 slot = slot->slots[offset]; 1025 node = entry_to_node(child);
1017 if (radix_tree_is_indirect_ptr(slot)) { 1026 continue;
1018 slot = indirect_to_ptr(slot);
1019 shift -= RADIX_TREE_MAP_SHIFT;
1020 continue;
1021 } else {
1022 slot = node;
1023 node = node->parent;
1024 }
1025 } 1027 }
1026 1028
1027 /* tag the leaf */ 1029 /* tag the leaf */
1028 tagged += 1 << shift; 1030 tagged++;
1029 tag_set(slot, settag, offset); 1031 tag_set(node, settag, offset);
1030 1032
1031 /* walk back up the path tagging interior nodes */ 1033 /* walk back up the path tagging interior nodes */
1032 upindex = index; 1034 parent = node;
1033 while (node) { 1035 for (;;) {
1034 upindex >>= RADIX_TREE_MAP_SHIFT; 1036 offset = parent->offset;
1035 offset = upindex & RADIX_TREE_MAP_MASK; 1037 parent = parent->parent;
1036 1038 if (!parent)
1039 break;
1037 /* stop if we find a node with the tag already set */ 1040 /* stop if we find a node with the tag already set */
1038 if (tag_get(node, settag, offset)) 1041 if (tag_get(parent, settag, offset))
1039 break; 1042 break;
1040 tag_set(node, settag, offset); 1043 tag_set(parent, settag, offset);
1041 node = node->parent;
1042 } 1044 }
1043 1045 next:
1044 /* 1046 /* Go to next entry in node */
1045 * Small optimization: now clear that node pointer. 1047 index = ((index >> node->shift) + 1) << node->shift;
1046 * Since all of this slot's ancestors now have the tag set
1047 * from setting it above, we have no further need to walk
1048 * back up the tree setting tags, until we update slot to
1049 * point to another radix_tree_node.
1050 */
1051 node = NULL;
1052
1053next:
1054 /* Go to next item at level determined by 'shift' */
1055 index = ((index >> shift) + 1) << shift;
1056 /* Overflow can happen when last_index is ~0UL... */ 1048 /* Overflow can happen when last_index is ~0UL... */
1057 if (index > last_index || !index) 1049 if (index > last_index || !index)
1058 break; 1050 break;
1059 if (tagged >= nr_to_tag) 1051 offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
1060 break; 1052 while (offset == 0) {
1061 while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
1062 /* 1053 /*
1063 * We've fully scanned this node. Go up. Because 1054 * We've fully scanned this node. Go up. Because
1064 * last_index is guaranteed to be in the tree, what 1055 * last_index is guaranteed to be in the tree, what
1065 * we do below cannot wander astray. 1056 * we do below cannot wander astray.
1066 */ 1057 */
1067 slot = slot->parent; 1058 node = node->parent;
1068 shift += RADIX_TREE_MAP_SHIFT; 1059 offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
1069 } 1060 }
1061 if (is_sibling_entry(node, node->slots[offset]))
1062 goto next;
1063 if (tagged >= nr_to_tag)
1064 break;
1070 } 1065 }
1071 /* 1066 /*
1072 * We need not to tag the root tag if there is no tag which is set with 1067 * We need not to tag the root tag if there is no tag which is set with
@@ -1095,9 +1090,10 @@ EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
1095 * 1090 *
1096 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under 1091 * Like radix_tree_lookup, radix_tree_gang_lookup may be called under
1097 * rcu_read_lock. In this case, rather than the returned results being 1092 * rcu_read_lock. In this case, rather than the returned results being
1098 * an atomic snapshot of the tree at a single point in time, the semantics 1093 * an atomic snapshot of the tree at a single point in time, the
1099 * of an RCU protected gang lookup are as though multiple radix_tree_lookups 1094 * semantics of an RCU protected gang lookup are as though multiple
1100 * have been issued in individual locks, and results stored in 'results'. 1095 * radix_tree_lookups have been issued in individual locks, and results
1096 * stored in 'results'.
1101 */ 1097 */
1102unsigned int 1098unsigned int
1103radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 1099radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
@@ -1114,7 +1110,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
1114 results[ret] = rcu_dereference_raw(*slot); 1110 results[ret] = rcu_dereference_raw(*slot);
1115 if (!results[ret]) 1111 if (!results[ret])
1116 continue; 1112 continue;
1117 if (radix_tree_is_indirect_ptr(results[ret])) { 1113 if (radix_tree_is_internal_node(results[ret])) {
1118 slot = radix_tree_iter_retry(&iter); 1114 slot = radix_tree_iter_retry(&iter);
1119 continue; 1115 continue;
1120 } 1116 }
@@ -1197,7 +1193,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
1197 results[ret] = rcu_dereference_raw(*slot); 1193 results[ret] = rcu_dereference_raw(*slot);
1198 if (!results[ret]) 1194 if (!results[ret])
1199 continue; 1195 continue;
1200 if (radix_tree_is_indirect_ptr(results[ret])) { 1196 if (radix_tree_is_internal_node(results[ret])) {
1201 slot = radix_tree_iter_retry(&iter); 1197 slot = radix_tree_iter_retry(&iter);
1202 continue; 1198 continue;
1203 } 1199 }
@@ -1247,58 +1243,48 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
1247#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP) 1243#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
1248#include <linux/sched.h> /* for cond_resched() */ 1244#include <linux/sched.h> /* for cond_resched() */
1249 1245
1246struct locate_info {
1247 unsigned long found_index;
1248 bool stop;
1249};
1250
1250/* 1251/*
1251 * This linear search is at present only useful to shmem_unuse_inode(). 1252 * This linear search is at present only useful to shmem_unuse_inode().
1252 */ 1253 */
1253static unsigned long __locate(struct radix_tree_node *slot, void *item, 1254static unsigned long __locate(struct radix_tree_node *slot, void *item,
1254 unsigned long index, unsigned long *found_index) 1255 unsigned long index, struct locate_info *info)
1255{ 1256{
1256 unsigned int shift, height;
1257 unsigned long i; 1257 unsigned long i;
1258 1258
1259 height = slot->path & RADIX_TREE_HEIGHT_MASK; 1259 do {
1260 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 1260 unsigned int shift = slot->shift;
1261 1261
1262 for ( ; height > 1; height--) { 1262 for (i = (index >> shift) & RADIX_TREE_MAP_MASK;
1263 i = (index >> shift) & RADIX_TREE_MAP_MASK; 1263 i < RADIX_TREE_MAP_SIZE;
1264 for (;;) { 1264 i++, index += (1UL << shift)) {
1265 if (slot->slots[i] != NULL) 1265 struct radix_tree_node *node =
1266 break; 1266 rcu_dereference_raw(slot->slots[i]);
1267 index &= ~((1UL << shift) - 1); 1267 if (node == RADIX_TREE_RETRY)
1268 index += 1UL << shift;
1269 if (index == 0)
1270 goto out; /* 32-bit wraparound */
1271 i++;
1272 if (i == RADIX_TREE_MAP_SIZE)
1273 goto out; 1268 goto out;
1274 } 1269 if (!radix_tree_is_internal_node(node)) {
1275 1270 if (node == item) {
1276 slot = rcu_dereference_raw(slot->slots[i]); 1271 info->found_index = index;
1277 if (slot == NULL) 1272 info->stop = true;
1278 goto out; 1273 goto out;
1279 if (!radix_tree_is_indirect_ptr(slot)) { 1274 }
1280 if (slot == item) { 1275 continue;
1281 *found_index = index + i;
1282 index = 0;
1283 } else {
1284 index += shift;
1285 } 1276 }
1286 goto out; 1277 node = entry_to_node(node);
1278 if (is_sibling_entry(slot, node))
1279 continue;
1280 slot = node;
1281 break;
1287 } 1282 }
1288 slot = indirect_to_ptr(slot); 1283 } while (i < RADIX_TREE_MAP_SIZE);
1289 shift -= RADIX_TREE_MAP_SHIFT;
1290 }
1291 1284
1292 /* Bottom level: check items */
1293 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
1294 if (slot->slots[i] == item) {
1295 *found_index = index + i;
1296 index = 0;
1297 goto out;
1298 }
1299 }
1300 index += RADIX_TREE_MAP_SIZE;
1301out: 1285out:
1286 if ((index == 0) && (i == RADIX_TREE_MAP_SIZE))
1287 info->stop = true;
1302 return index; 1288 return index;
1303} 1289}
1304 1290
@@ -1316,32 +1302,35 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1316 struct radix_tree_node *node; 1302 struct radix_tree_node *node;
1317 unsigned long max_index; 1303 unsigned long max_index;
1318 unsigned long cur_index = 0; 1304 unsigned long cur_index = 0;
1319 unsigned long found_index = -1; 1305 struct locate_info info = {
1306 .found_index = -1,
1307 .stop = false,
1308 };
1320 1309
1321 do { 1310 do {
1322 rcu_read_lock(); 1311 rcu_read_lock();
1323 node = rcu_dereference_raw(root->rnode); 1312 node = rcu_dereference_raw(root->rnode);
1324 if (!radix_tree_is_indirect_ptr(node)) { 1313 if (!radix_tree_is_internal_node(node)) {
1325 rcu_read_unlock(); 1314 rcu_read_unlock();
1326 if (node == item) 1315 if (node == item)
1327 found_index = 0; 1316 info.found_index = 0;
1328 break; 1317 break;
1329 } 1318 }
1330 1319
1331 node = indirect_to_ptr(node); 1320 node = entry_to_node(node);
1332 max_index = radix_tree_maxindex(node->path & 1321
1333 RADIX_TREE_HEIGHT_MASK); 1322 max_index = node_maxindex(node);
1334 if (cur_index > max_index) { 1323 if (cur_index > max_index) {
1335 rcu_read_unlock(); 1324 rcu_read_unlock();
1336 break; 1325 break;
1337 } 1326 }
1338 1327
1339 cur_index = __locate(node, item, cur_index, &found_index); 1328 cur_index = __locate(node, item, cur_index, &info);
1340 rcu_read_unlock(); 1329 rcu_read_unlock();
1341 cond_resched(); 1330 cond_resched();
1342 } while (cur_index != 0 && cur_index <= max_index); 1331 } while (!info.stop && cur_index <= max_index);
1343 1332
1344 return found_index; 1333 return info.found_index;
1345} 1334}
1346#else 1335#else
1347unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) 1336unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
@@ -1351,47 +1340,45 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1351#endif /* CONFIG_SHMEM && CONFIG_SWAP */ 1340#endif /* CONFIG_SHMEM && CONFIG_SWAP */
1352 1341
1353/** 1342/**
1354 * radix_tree_shrink - shrink height of a radix tree to minimal 1343 * radix_tree_shrink - shrink radix tree to minimum height
1355 * @root radix tree root 1344 * @root radix tree root
1356 */ 1345 */
1357static inline void radix_tree_shrink(struct radix_tree_root *root) 1346static inline bool radix_tree_shrink(struct radix_tree_root *root)
1358{ 1347{
1359 /* try to shrink tree height */ 1348 bool shrunk = false;
1360 while (root->height > 0) { 1349
1361 struct radix_tree_node *to_free = root->rnode; 1350 for (;;) {
1362 struct radix_tree_node *slot; 1351 struct radix_tree_node *node = root->rnode;
1352 struct radix_tree_node *child;
1363 1353
1364 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1354 if (!radix_tree_is_internal_node(node))
1365 to_free = indirect_to_ptr(to_free); 1355 break;
1356 node = entry_to_node(node);
1366 1357
1367 /* 1358 /*
1368 * The candidate node has more than one child, or its child 1359 * The candidate node has more than one child, or its child
1369 * is not at the leftmost slot, or it is a multiorder entry, 1360 * is not at the leftmost slot, or the child is a multiorder
1370 * we cannot shrink. 1361 * entry, we cannot shrink.
1371 */ 1362 */
1372 if (to_free->count != 1) 1363 if (node->count != 1)
1373 break; 1364 break;
1374 slot = to_free->slots[0]; 1365 child = node->slots[0];
1375 if (!slot) 1366 if (!child)
1376 break; 1367 break;
1368 if (!radix_tree_is_internal_node(child) && node->shift)
1369 break;
1370
1371 if (radix_tree_is_internal_node(child))
1372 entry_to_node(child)->parent = NULL;
1377 1373
1378 /* 1374 /*
1379 * We don't need rcu_assign_pointer(), since we are simply 1375 * We don't need rcu_assign_pointer(), since we are simply
1380 * moving the node from one part of the tree to another: if it 1376 * moving the node from one part of the tree to another: if it
1381 * was safe to dereference the old pointer to it 1377 * was safe to dereference the old pointer to it
1382 * (to_free->slots[0]), it will be safe to dereference the new 1378 * (node->slots[0]), it will be safe to dereference the new
1383 * one (root->rnode) as far as dependent read barriers go. 1379 * one (root->rnode) as far as dependent read barriers go.
1384 */ 1380 */
1385 if (root->height > 1) { 1381 root->rnode = child;
1386 if (!radix_tree_is_indirect_ptr(slot))
1387 break;
1388
1389 slot = indirect_to_ptr(slot);
1390 slot->parent = NULL;
1391 slot = ptr_to_indirect(slot);
1392 }
1393 root->rnode = slot;
1394 root->height--;
1395 1382
1396 /* 1383 /*
1397 * We have a dilemma here. The node's slot[0] must not be 1384 * We have a dilemma here. The node's slot[0] must not be
@@ -1403,7 +1390,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1403 * their slot to become empty sooner or later. 1390 * their slot to become empty sooner or later.
1404 * 1391 *
1405 * For example, lockless pagecache will look up a slot, deref 1392 * For example, lockless pagecache will look up a slot, deref
1406 * the page pointer, and if the page is 0 refcount it means it 1393 * the page pointer, and if the page has 0 refcount it means it
1407 * was concurrently deleted from pagecache so try the deref 1394 * was concurrently deleted from pagecache so try the deref
1408 * again. Fortunately there is already a requirement for logic 1395 * again. Fortunately there is already a requirement for logic
1409 * to retry the entire slot lookup -- the indirect pointer 1396 * to retry the entire slot lookup -- the indirect pointer
@@ -1411,12 +1398,14 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1411 * also results in a stale slot). So tag the slot as indirect 1398 * also results in a stale slot). So tag the slot as indirect
1412 * to force callers to retry. 1399 * to force callers to retry.
1413 */ 1400 */
1414 if (root->height == 0) 1401 if (!radix_tree_is_internal_node(child))
1415 *((unsigned long *)&to_free->slots[0]) |= 1402 node->slots[0] = RADIX_TREE_RETRY;
1416 RADIX_TREE_INDIRECT_PTR;
1417 1403
1418 radix_tree_node_free(to_free); 1404 radix_tree_node_free(node);
1405 shrunk = true;
1419 } 1406 }
1407
1408 return shrunk;
1420} 1409}
1421 1410
1422/** 1411/**
@@ -1439,24 +1428,17 @@ bool __radix_tree_delete_node(struct radix_tree_root *root,
1439 struct radix_tree_node *parent; 1428 struct radix_tree_node *parent;
1440 1429
1441 if (node->count) { 1430 if (node->count) {
1442 if (node == indirect_to_ptr(root->rnode)) { 1431 if (node == entry_to_node(root->rnode))
1443 radix_tree_shrink(root); 1432 deleted |= radix_tree_shrink(root);
1444 if (root->height == 0)
1445 deleted = true;
1446 }
1447 return deleted; 1433 return deleted;
1448 } 1434 }
1449 1435
1450 parent = node->parent; 1436 parent = node->parent;
1451 if (parent) { 1437 if (parent) {
1452 unsigned int offset; 1438 parent->slots[node->offset] = NULL;
1453
1454 offset = node->path >> RADIX_TREE_HEIGHT_SHIFT;
1455 parent->slots[offset] = NULL;
1456 parent->count--; 1439 parent->count--;
1457 } else { 1440 } else {
1458 root_tag_clear_all(root); 1441 root_tag_clear_all(root);
1459 root->height = 0;
1460 root->rnode = NULL; 1442 root->rnode = NULL;
1461 } 1443 }
1462 1444
@@ -1469,6 +1451,20 @@ bool __radix_tree_delete_node(struct radix_tree_root *root,
1469 return deleted; 1451 return deleted;
1470} 1452}
1471 1453
1454static inline void delete_sibling_entries(struct radix_tree_node *node,
1455 void *ptr, unsigned offset)
1456{
1457#ifdef CONFIG_RADIX_TREE_MULTIORDER
1458 int i;
1459 for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
1460 if (node->slots[offset + i] != ptr)
1461 break;
1462 node->slots[offset + i] = NULL;
1463 node->count--;
1464 }
1465#endif
1466}
1467
1472/** 1468/**
1473 * radix_tree_delete_item - delete an item from a radix tree 1469 * radix_tree_delete_item - delete an item from a radix tree
1474 * @root: radix tree root 1470 * @root: radix tree root
@@ -1484,7 +1480,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
1484 unsigned long index, void *item) 1480 unsigned long index, void *item)
1485{ 1481{
1486 struct radix_tree_node *node; 1482 struct radix_tree_node *node;
1487 unsigned int offset, i; 1483 unsigned int offset;
1488 void **slot; 1484 void **slot;
1489 void *entry; 1485 void *entry;
1490 int tag; 1486 int tag;
@@ -1502,24 +1498,13 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
1502 return entry; 1498 return entry;
1503 } 1499 }
1504 1500
1505 offset = index & RADIX_TREE_MAP_MASK; 1501 offset = get_slot_offset(node, slot);
1506 1502
1507 /* 1503 /* Clear all tags associated with the item to be deleted. */
1508 * Clear all tags associated with the item to be deleted. 1504 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1509 * This way of doing it would be inefficient, but seldom is any set. 1505 node_tag_clear(root, node, tag, offset);
1510 */
1511 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
1512 if (tag_get(node, tag, offset))
1513 radix_tree_tag_clear(root, index, tag);
1514 }
1515 1506
1516 /* Delete any sibling slots pointing to this slot */ 1507 delete_sibling_entries(node, node_to_entry(slot), offset);
1517 for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
1518 if (node->slots[offset + i] != ptr_to_indirect(slot))
1519 break;
1520 node->slots[offset + i] = NULL;
1521 node->count--;
1522 }
1523 node->slots[offset] = NULL; 1508 node->slots[offset] = NULL;
1524 node->count--; 1509 node->count--;
1525 1510
@@ -1544,6 +1529,28 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1544} 1529}
1545EXPORT_SYMBOL(radix_tree_delete); 1530EXPORT_SYMBOL(radix_tree_delete);
1546 1531
1532struct radix_tree_node *radix_tree_replace_clear_tags(
1533 struct radix_tree_root *root,
1534 unsigned long index, void *entry)
1535{
1536 struct radix_tree_node *node;
1537 void **slot;
1538
1539 __radix_tree_lookup(root, index, &node, &slot);
1540
1541 if (node) {
1542 unsigned int tag, offset = get_slot_offset(node, slot);
1543 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1544 node_tag_clear(root, node, tag, offset);
1545 } else {
1546 /* Clear root node tags */
1547 root->gfp_mask &= __GFP_BITS_MASK;
1548 }
1549
1550 radix_tree_replace_slot(slot, entry);
1551 return node;
1552}
1553
1547/** 1554/**
1548 * radix_tree_tagged - test whether any items in the tree are tagged 1555 * radix_tree_tagged - test whether any items in the tree are tagged
1549 * @root: radix tree root 1556 * @root: radix tree root
@@ -1564,45 +1571,24 @@ radix_tree_node_ctor(void *arg)
1564 INIT_LIST_HEAD(&node->private_list); 1571 INIT_LIST_HEAD(&node->private_list);
1565} 1572}
1566 1573
1567static __init unsigned long __maxindex(unsigned int height)
1568{
1569 unsigned int width = height * RADIX_TREE_MAP_SHIFT;
1570 int shift = RADIX_TREE_INDEX_BITS - width;
1571
1572 if (shift < 0)
1573 return ~0UL;
1574 if (shift >= BITS_PER_LONG)
1575 return 0UL;
1576 return ~0UL >> shift;
1577}
1578
1579static __init void radix_tree_init_maxindex(void)
1580{
1581 unsigned int i;
1582
1583 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
1584 height_to_maxindex[i] = __maxindex(i);
1585}
1586
1587static int radix_tree_callback(struct notifier_block *nfb, 1574static int radix_tree_callback(struct notifier_block *nfb,
1588 unsigned long action, 1575 unsigned long action, void *hcpu)
1589 void *hcpu)
1590{ 1576{
1591 int cpu = (long)hcpu; 1577 int cpu = (long)hcpu;
1592 struct radix_tree_preload *rtp; 1578 struct radix_tree_preload *rtp;
1593 struct radix_tree_node *node; 1579 struct radix_tree_node *node;
1594 1580
1595 /* Free per-cpu pool of perloaded nodes */ 1581 /* Free per-cpu pool of preloaded nodes */
1596 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 1582 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
1597 rtp = &per_cpu(radix_tree_preloads, cpu); 1583 rtp = &per_cpu(radix_tree_preloads, cpu);
1598 while (rtp->nr) { 1584 while (rtp->nr) {
1599 node = rtp->nodes; 1585 node = rtp->nodes;
1600 rtp->nodes = node->private_data; 1586 rtp->nodes = node->private_data;
1601 kmem_cache_free(radix_tree_node_cachep, node); 1587 kmem_cache_free(radix_tree_node_cachep, node);
1602 rtp->nr--; 1588 rtp->nr--;
1603 } 1589 }
1604 } 1590 }
1605 return NOTIFY_OK; 1591 return NOTIFY_OK;
1606} 1592}
1607 1593
1608void __init radix_tree_init(void) 1594void __init radix_tree_init(void)
@@ -1611,6 +1597,5 @@ void __init radix_tree_init(void)
1611 sizeof(struct radix_tree_node), 0, 1597 sizeof(struct radix_tree_node), 0,
1612 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1598 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
1613 radix_tree_node_ctor); 1599 radix_tree_node_ctor);
1614 radix_tree_init_maxindex();
1615 hotcpu_notifier(radix_tree_callback, 0); 1600 hotcpu_notifier(radix_tree_callback, 0);
1616} 1601}
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 33840324138c..33f655ef48cd 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -1,5 +1,6 @@
1#include <linux/compiler.h> 1#include <linux/compiler.h>
2#include <linux/export.h> 2#include <linux/export.h>
3#include <linux/kasan-checks.h>
3#include <linux/uaccess.h> 4#include <linux/uaccess.h>
4#include <linux/kernel.h> 5#include <linux/kernel.h>
5#include <linux/errno.h> 6#include <linux/errno.h>
@@ -109,6 +110,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
109 unsigned long max = max_addr - src_addr; 110 unsigned long max = max_addr - src_addr;
110 long retval; 111 long retval;
111 112
113 kasan_check_write(dst, count);
112 user_access_begin(); 114 user_access_begin();
113 retval = do_strncpy_from_user(dst, src, count, max); 115 retval = do_strncpy_from_user(dst, src, count, max);
114 user_access_end(); 116 user_access_end();
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 82169fbf2453..5e51872b3fc1 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -12,9 +12,12 @@
12#define pr_fmt(fmt) "kasan test: %s " fmt, __func__ 12#define pr_fmt(fmt) "kasan test: %s " fmt, __func__
13 13
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/mman.h>
16#include <linux/mm.h>
15#include <linux/printk.h> 17#include <linux/printk.h>
16#include <linux/slab.h> 18#include <linux/slab.h>
17#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/uaccess.h>
18#include <linux/module.h> 21#include <linux/module.h>
19 22
20static noinline void __init kmalloc_oob_right(void) 23static noinline void __init kmalloc_oob_right(void)
@@ -344,6 +347,70 @@ static noinline void __init kasan_stack_oob(void)
344 *(volatile char *)p; 347 *(volatile char *)p;
345} 348}
346 349
350static noinline void __init ksize_unpoisons_memory(void)
351{
352 char *ptr;
353 size_t size = 123, real_size = size;
354
355 pr_info("ksize() unpoisons the whole allocated chunk\n");
356 ptr = kmalloc(size, GFP_KERNEL);
357 if (!ptr) {
358 pr_err("Allocation failed\n");
359 return;
360 }
361 real_size = ksize(ptr);
362 /* This access doesn't trigger an error. */
363 ptr[size] = 'x';
364 /* This one does. */
365 ptr[real_size] = 'y';
366 kfree(ptr);
367}
368
369static noinline void __init copy_user_test(void)
370{
371 char *kmem;
372 char __user *usermem;
373 size_t size = 10;
374 int unused;
375
376 kmem = kmalloc(size, GFP_KERNEL);
377 if (!kmem)
378 return;
379
380 usermem = (char __user *)vm_mmap(NULL, 0, PAGE_SIZE,
381 PROT_READ | PROT_WRITE | PROT_EXEC,
382 MAP_ANONYMOUS | MAP_PRIVATE, 0);
383 if (IS_ERR(usermem)) {
384 pr_err("Failed to allocate user memory\n");
385 kfree(kmem);
386 return;
387 }
388
389 pr_info("out-of-bounds in copy_from_user()\n");
390 unused = copy_from_user(kmem, usermem, size + 1);
391
392 pr_info("out-of-bounds in copy_to_user()\n");
393 unused = copy_to_user(usermem, kmem, size + 1);
394
395 pr_info("out-of-bounds in __copy_from_user()\n");
396 unused = __copy_from_user(kmem, usermem, size + 1);
397
398 pr_info("out-of-bounds in __copy_to_user()\n");
399 unused = __copy_to_user(usermem, kmem, size + 1);
400
401 pr_info("out-of-bounds in __copy_from_user_inatomic()\n");
402 unused = __copy_from_user_inatomic(kmem, usermem, size + 1);
403
404 pr_info("out-of-bounds in __copy_to_user_inatomic()\n");
405 unused = __copy_to_user_inatomic(usermem, kmem, size + 1);
406
407 pr_info("out-of-bounds in strncpy_from_user()\n");
408 unused = strncpy_from_user(kmem, usermem, size + 1);
409
410 vm_munmap((unsigned long)usermem, PAGE_SIZE);
411 kfree(kmem);
412}
413
347static int __init kmalloc_tests_init(void) 414static int __init kmalloc_tests_init(void)
348{ 415{
349 kmalloc_oob_right(); 416 kmalloc_oob_right();
@@ -367,6 +434,8 @@ static int __init kmalloc_tests_init(void)
367 kmem_cache_oob(); 434 kmem_cache_oob();
368 kasan_stack_oob(); 435 kasan_stack_oob();
369 kasan_global_oob(); 436 kasan_global_oob();
437 ksize_unpoisons_memory();
438 copy_user_test();
370 return -EAGAIN; 439 return -EAGAIN;
371} 440}
372 441
diff --git a/lib/uuid.c b/lib/uuid.c
index 398821e4dce1..e116ae5fa00f 100644
--- a/lib/uuid.c
+++ b/lib/uuid.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Unified UUID/GUID definition 2 * Unified UUID/GUID definition
3 * 3 *
4 * Copyright (C) 2009, Intel Corp. 4 * Copyright (C) 2009, 2016 Intel Corp.
5 * Huang Ying <ying.huang@intel.com> 5 * Huang Ying <ying.huang@intel.com>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
@@ -12,17 +12,40 @@
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details. 14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */ 15 */
20 16
21#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/ctype.h>
19#include <linux/errno.h>
22#include <linux/export.h> 20#include <linux/export.h>
23#include <linux/uuid.h> 21#include <linux/uuid.h>
24#include <linux/random.h> 22#include <linux/random.h>
25 23
24const u8 uuid_le_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
25EXPORT_SYMBOL(uuid_le_index);
26const u8 uuid_be_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
27EXPORT_SYMBOL(uuid_be_index);
28
29/***************************************************************
30 * Random UUID interface
31 *
32 * Used here for a Boot ID, but can be useful for other kernel
33 * drivers.
34 ***************************************************************/
35
36/*
37 * Generate random UUID
38 */
39void generate_random_uuid(unsigned char uuid[16])
40{
41 get_random_bytes(uuid, 16);
42 /* Set UUID version to 4 --- truly random generation */
43 uuid[6] = (uuid[6] & 0x0F) | 0x40;
44 /* Set the UUID variant to DCE */
45 uuid[8] = (uuid[8] & 0x3F) | 0x80;
46}
47EXPORT_SYMBOL(generate_random_uuid);
48
26static void __uuid_gen_common(__u8 b[16]) 49static void __uuid_gen_common(__u8 b[16])
27{ 50{
28 prandom_bytes(b, 16); 51 prandom_bytes(b, 16);
@@ -45,3 +68,61 @@ void uuid_be_gen(uuid_be *bu)
45 bu->b[6] = (bu->b[6] & 0x0F) | 0x40; 68 bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
46} 69}
47EXPORT_SYMBOL_GPL(uuid_be_gen); 70EXPORT_SYMBOL_GPL(uuid_be_gen);
71
72/**
73 * uuid_is_valid - checks if UUID string valid
74 * @uuid: UUID string to check
75 *
76 * Description:
77 * It checks if the UUID string is following the format:
78 * xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
79 * where x is a hex digit.
80 *
81 * Return: true if input is valid UUID string.
82 */
83bool uuid_is_valid(const char *uuid)
84{
85 unsigned int i;
86
87 for (i = 0; i < UUID_STRING_LEN; i++) {
88 if (i == 8 || i == 13 || i == 18 || i == 23) {
89 if (uuid[i] != '-')
90 return false;
91 } else if (!isxdigit(uuid[i])) {
92 return false;
93 }
94 }
95
96 return true;
97}
98EXPORT_SYMBOL(uuid_is_valid);
99
100static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16])
101{
102 static const u8 si[16] = {0,2,4,6,9,11,14,16,19,21,24,26,28,30,32,34};
103 unsigned int i;
104
105 if (!uuid_is_valid(uuid))
106 return -EINVAL;
107
108 for (i = 0; i < 16; i++) {
109 int hi = hex_to_bin(uuid[si[i]] + 0);
110 int lo = hex_to_bin(uuid[si[i]] + 1);
111
112 b[ei[i]] = (hi << 4) | lo;
113 }
114
115 return 0;
116}
117
118int uuid_le_to_bin(const char *uuid, uuid_le *u)
119{
120 return __uuid_to_bin(uuid, u->b, uuid_le_index);
121}
122EXPORT_SYMBOL(uuid_le_to_bin);
123
124int uuid_be_to_bin(const char *uuid, uuid_be *u)
125{
126 return __uuid_to_bin(uuid, u->b, uuid_be_index);
127}
128EXPORT_SYMBOL(uuid_be_to_bin);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index ccb664b54280..0967771d8f7f 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -30,6 +30,7 @@
30#include <linux/ioport.h> 30#include <linux/ioport.h>
31#include <linux/dcache.h> 31#include <linux/dcache.h>
32#include <linux/cred.h> 32#include <linux/cred.h>
33#include <linux/uuid.h>
33#include <net/addrconf.h> 34#include <net/addrconf.h>
34#ifdef CONFIG_BLOCK 35#ifdef CONFIG_BLOCK
35#include <linux/blkdev.h> 36#include <linux/blkdev.h>
@@ -1304,19 +1305,17 @@ static noinline_for_stack
1304char *uuid_string(char *buf, char *end, const u8 *addr, 1305char *uuid_string(char *buf, char *end, const u8 *addr,
1305 struct printf_spec spec, const char *fmt) 1306 struct printf_spec spec, const char *fmt)
1306{ 1307{
1307 char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")]; 1308 char uuid[UUID_STRING_LEN + 1];
1308 char *p = uuid; 1309 char *p = uuid;
1309 int i; 1310 int i;
1310 static const u8 be[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; 1311 const u8 *index = uuid_be_index;
1311 static const u8 le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
1312 const u8 *index = be;
1313 bool uc = false; 1312 bool uc = false;
1314 1313
1315 switch (*(++fmt)) { 1314 switch (*(++fmt)) {
1316 case 'L': 1315 case 'L':
1317 uc = true; /* fall-through */ 1316 uc = true; /* fall-through */
1318 case 'l': 1317 case 'l':
1319 index = le; 1318 index = uuid_le_index;
1320 break; 1319 break;
1321 case 'B': 1320 case 'B':
1322 uc = true; 1321 uc = true;
@@ -1324,7 +1323,10 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
1324 } 1323 }
1325 1324
1326 for (i = 0; i < 16; i++) { 1325 for (i = 0; i < 16; i++) {
1327 p = hex_byte_pack(p, addr[index[i]]); 1326 if (uc)
1327 p = hex_byte_pack_upper(p, addr[index[i]]);
1328 else
1329 p = hex_byte_pack(p, addr[index[i]]);
1328 switch (i) { 1330 switch (i) {
1329 case 3: 1331 case 3:
1330 case 5: 1332 case 5:
@@ -1337,13 +1339,6 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
1337 1339
1338 *p = 0; 1340 *p = 0;
1339 1341
1340 if (uc) {
1341 p = uuid;
1342 do {
1343 *p = toupper(*p);
1344 } while (*(++p));
1345 }
1346
1347 return string(buf, end, uuid, spec); 1342 return string(buf, end, uuid, spec);
1348} 1343}
1349 1344