summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Wilcox <willy@infradead.org>2017-11-03 13:30:42 -0400
committerMatthew Wilcox <willy@infradead.org>2018-09-29 22:47:49 -0400
commit3159f943aafdbacb2f94c38fdaadabf2bbde2a14 (patch)
tree7e06823a1ab7e90774535d17a217a939bdddda3b
parent66ee620f06f99d72475db6eb638559ba608c7dee (diff)
xarray: Replace exceptional entries
Introduce xarray value entries and tagged pointers to replace radix tree exceptional entries. This is a slight change in encoding to allow the use of an extra bit (we can now store BITS_PER_LONG - 1 bits in a value entry). It is also a change in emphasis; exceptional entries are intimidating and different. As the comment explains, you can choose to store values or pointers in the xarray and they are both first-class citizens. Signed-off-by: Matthew Wilcox <willy@infradead.org> Reviewed-by: Josef Bacik <jbacik@fb.com>
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h4
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h4
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c17
-rw-r--r--drivers/staging/erofs/utils.c18
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/dax.c112
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--include/linux/radix-tree.h36
-rw-r--r--include/linux/swapops.h19
-rw-r--r--include/linux/xarray.h102
-rw-r--r--lib/idr.c60
-rw-r--r--lib/radix-tree.c21
-rw-r--r--mm/filemap.c10
-rw-r--r--mm/khugepaged.c2
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/mincore.c2
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/shmem.c10
-rw-r--r--mm/swap.c2
-rw-r--r--mm/truncate.c12
-rw-r--r--mm/workingset.c13
-rw-r--r--tools/testing/radix-tree/idr-test.c6
-rw-r--r--tools/testing/radix-tree/linux/radix-tree.h1
-rw-r--r--tools/testing/radix-tree/multiorder.c47
-rw-r--r--tools/testing/radix-tree/test.c2
26 files changed, 278 insertions, 232 deletions
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 2fdc865ca374..62039e557ac0 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -723,9 +723,7 @@ static inline bool pte_user(pte_t pte)
723 BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \ 723 BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
724 BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY); \ 724 BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY); \
725 } while (0) 725 } while (0)
726/* 726
727 * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
728 */
729#define SWP_TYPE_BITS 5 727#define SWP_TYPE_BITS 5
730#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \ 728#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \
731 & ((1UL << SWP_TYPE_BITS) - 1)) 729 & ((1UL << SWP_TYPE_BITS) - 1))
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 7cd6809f4d33..05765c2d2c1f 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -313,9 +313,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
313#define MAX_SWAPFILES_CHECK() do { \ 313#define MAX_SWAPFILES_CHECK() do { \
314 BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \ 314 BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
315 } while (0) 315 } while (0)
316/* 316
317 * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
318 */
319#define SWP_TYPE_BITS 5 317#define SWP_TYPE_BITS 5
320#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \ 318#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \
321 & ((1UL << SWP_TYPE_BITS) - 1)) 319 & ((1UL << SWP_TYPE_BITS) - 1))
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fcc73a6ab503..316730b45f84 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5996,7 +5996,8 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
5996 count = __sg_page_count(sg); 5996 count = __sg_page_count(sg);
5997 5997
5998 while (idx + count <= n) { 5998 while (idx + count <= n) {
5999 unsigned long exception, i; 5999 void *entry;
6000 unsigned long i;
6000 int ret; 6001 int ret;
6001 6002
6002 /* If we cannot allocate and insert this entry, or the 6003 /* If we cannot allocate and insert this entry, or the
@@ -6011,12 +6012,9 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
6011 if (ret && ret != -EEXIST) 6012 if (ret && ret != -EEXIST)
6012 goto scan; 6013 goto scan;
6013 6014
6014 exception = 6015 entry = xa_mk_value(idx);
6015 RADIX_TREE_EXCEPTIONAL_ENTRY |
6016 idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
6017 for (i = 1; i < count; i++) { 6016 for (i = 1; i < count; i++) {
6018 ret = radix_tree_insert(&iter->radix, idx + i, 6017 ret = radix_tree_insert(&iter->radix, idx + i, entry);
6019 (void *)exception);
6020 if (ret && ret != -EEXIST) 6018 if (ret && ret != -EEXIST)
6021 goto scan; 6019 goto scan;
6022 } 6020 }
@@ -6054,15 +6052,14 @@ lookup:
6054 GEM_BUG_ON(!sg); 6052 GEM_BUG_ON(!sg);
6055 6053
6056 /* If this index is in the middle of multi-page sg entry, 6054 /* If this index is in the middle of multi-page sg entry,
6057 * the radixtree will contain an exceptional entry that points 6055 * the radix tree will contain a value entry that points
6058 * to the start of that range. We will return the pointer to 6056 * to the start of that range. We will return the pointer to
6059 * the base page and the offset of this page within the 6057 * the base page and the offset of this page within the
6060 * sg entry's range. 6058 * sg entry's range.
6061 */ 6059 */
6062 *offset = 0; 6060 *offset = 0;
6063 if (unlikely(radix_tree_exception(sg))) { 6061 if (unlikely(xa_is_value(sg))) {
6064 unsigned long base = 6062 unsigned long base = xa_to_value(sg);
6065 (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
6066 6063
6067 sg = radix_tree_lookup(&iter->radix, base); 6064 sg = radix_tree_lookup(&iter->radix, base);
6068 GEM_BUG_ON(!sg); 6065 GEM_BUG_ON(!sg);
diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
index 595cf90af9bb..bdee9bd09f11 100644
--- a/drivers/staging/erofs/utils.c
+++ b/drivers/staging/erofs/utils.c
@@ -35,7 +35,6 @@ static atomic_long_t erofs_global_shrink_cnt;
35 35
36#ifdef CONFIG_EROFS_FS_ZIP 36#ifdef CONFIG_EROFS_FS_ZIP
37 37
38/* radix_tree and the future XArray both don't use tagptr_t yet */
39struct erofs_workgroup *erofs_find_workgroup( 38struct erofs_workgroup *erofs_find_workgroup(
40 struct super_block *sb, pgoff_t index, bool *tag) 39 struct super_block *sb, pgoff_t index, bool *tag)
41{ 40{
@@ -47,9 +46,8 @@ repeat:
47 rcu_read_lock(); 46 rcu_read_lock();
48 grp = radix_tree_lookup(&sbi->workstn_tree, index); 47 grp = radix_tree_lookup(&sbi->workstn_tree, index);
49 if (grp != NULL) { 48 if (grp != NULL) {
50 *tag = radix_tree_exceptional_entry(grp); 49 *tag = xa_pointer_tag(grp);
51 grp = (void *)((unsigned long)grp & 50 grp = xa_untag_pointer(grp);
52 ~RADIX_TREE_EXCEPTIONAL_ENTRY);
53 51
54 if (erofs_workgroup_get(grp, &oldcount)) { 52 if (erofs_workgroup_get(grp, &oldcount)) {
55 /* prefer to relax rcu read side */ 53 /* prefer to relax rcu read side */
@@ -83,9 +81,7 @@ int erofs_register_workgroup(struct super_block *sb,
83 sbi = EROFS_SB(sb); 81 sbi = EROFS_SB(sb);
84 erofs_workstn_lock(sbi); 82 erofs_workstn_lock(sbi);
85 83
86 if (tag) 84 grp = xa_tag_pointer(grp, tag);
87 grp = (void *)((unsigned long)grp |
88 1UL << RADIX_TREE_EXCEPTIONAL_SHIFT);
89 85
90 err = radix_tree_insert(&sbi->workstn_tree, 86 err = radix_tree_insert(&sbi->workstn_tree,
91 grp->index, grp); 87 grp->index, grp);
@@ -131,9 +127,7 @@ repeat:
131 127
132 for (i = 0; i < found; ++i) { 128 for (i = 0; i < found; ++i) {
133 int cnt; 129 int cnt;
134 struct erofs_workgroup *grp = (void *) 130 struct erofs_workgroup *grp = xa_untag_pointer(batch[i]);
135 ((unsigned long)batch[i] &
136 ~RADIX_TREE_EXCEPTIONAL_ENTRY);
137 131
138 first_index = grp->index + 1; 132 first_index = grp->index + 1;
139 133
@@ -150,8 +144,8 @@ repeat:
150#endif 144#endif
151 continue; 145 continue;
152 146
153 if (radix_tree_delete(&sbi->workstn_tree, 147 if (xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
154 grp->index) != grp) { 148 grp->index)) != grp) {
155#ifdef EROFS_FS_HAS_MANAGED_CACHE 149#ifdef EROFS_FS_HAS_MANAGED_CACHE
156skip: 150skip:
157 erofs_workgroup_unfreeze(grp, 1); 151 erofs_workgroup_unfreeze(grp, 1);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 9bfa66592aa7..fd25e125303c 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -440,7 +440,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
440 rcu_read_lock(); 440 rcu_read_lock();
441 page = radix_tree_lookup(&mapping->i_pages, pg_index); 441 page = radix_tree_lookup(&mapping->i_pages, pg_index);
442 rcu_read_unlock(); 442 rcu_read_unlock();
443 if (page && !radix_tree_exceptional_entry(page)) { 443 if (page && !xa_is_value(page)) {
444 misses++; 444 misses++;
445 if (misses > 4) 445 if (misses > 4)
446 break; 446 break;
diff --git a/fs/dax.c b/fs/dax.c
index b68ce484e1be..ebcec36335eb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -59,56 +59,57 @@ static int __init init_dax_wait_table(void)
59fs_initcall(init_dax_wait_table); 59fs_initcall(init_dax_wait_table);
60 60
61/* 61/*
62 * We use lowest available bit in exceptional entry for locking, one bit for 62 * DAX pagecache entries use XArray value entries so they can't be mistaken
63 * the entry size (PMD) and two more to tell us if the entry is a zero page or 63 * for pages. We use one bit for locking, one bit for the entry size (PMD)
64 * an empty entry that is just used for locking. In total four special bits. 64 * and two more to tell us if the entry is a zero page or an empty entry that
65 * is just used for locking. In total four special bits.
65 * 66 *
66 * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE 67 * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE
67 * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem 68 * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
68 * block allocation. 69 * block allocation.
69 */ 70 */
70#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4) 71#define DAX_SHIFT (4)
71#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) 72#define DAX_LOCKED (1UL << 0)
72#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1)) 73#define DAX_PMD (1UL << 1)
73#define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) 74#define DAX_ZERO_PAGE (1UL << 2)
74#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)) 75#define DAX_EMPTY (1UL << 3)
75 76
76static unsigned long dax_radix_pfn(void *entry) 77static unsigned long dax_radix_pfn(void *entry)
77{ 78{
78 return (unsigned long)entry >> RADIX_DAX_SHIFT; 79 return xa_to_value(entry) >> DAX_SHIFT;
79} 80}
80 81
81static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags) 82static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags)
82{ 83{
83 return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags | 84 return xa_mk_value(flags | ((unsigned long)pfn << DAX_SHIFT) |
84 (pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK); 85 DAX_LOCKED);
85} 86}
86 87
87static unsigned int dax_radix_order(void *entry) 88static unsigned int dax_radix_order(void *entry)
88{ 89{
89 if ((unsigned long)entry & RADIX_DAX_PMD) 90 if (xa_to_value(entry) & DAX_PMD)
90 return PMD_SHIFT - PAGE_SHIFT; 91 return PMD_SHIFT - PAGE_SHIFT;
91 return 0; 92 return 0;
92} 93}
93 94
94static int dax_is_pmd_entry(void *entry) 95static int dax_is_pmd_entry(void *entry)
95{ 96{
96 return (unsigned long)entry & RADIX_DAX_PMD; 97 return xa_to_value(entry) & DAX_PMD;
97} 98}
98 99
99static int dax_is_pte_entry(void *entry) 100static int dax_is_pte_entry(void *entry)
100{ 101{
101 return !((unsigned long)entry & RADIX_DAX_PMD); 102 return !(xa_to_value(entry) & DAX_PMD);
102} 103}
103 104
104static int dax_is_zero_entry(void *entry) 105static int dax_is_zero_entry(void *entry)
105{ 106{
106 return (unsigned long)entry & RADIX_DAX_ZERO_PAGE; 107 return xa_to_value(entry) & DAX_ZERO_PAGE;
107} 108}
108 109
109static int dax_is_empty_entry(void *entry) 110static int dax_is_empty_entry(void *entry)
110{ 111{
111 return (unsigned long)entry & RADIX_DAX_EMPTY; 112 return xa_to_value(entry) & DAX_EMPTY;
112} 113}
113 114
114/* 115/*
@@ -186,9 +187,9 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
186 */ 187 */
187static inline int slot_locked(struct address_space *mapping, void **slot) 188static inline int slot_locked(struct address_space *mapping, void **slot)
188{ 189{
189 unsigned long entry = (unsigned long) 190 unsigned long entry = xa_to_value(
190 radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); 191 radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
191 return entry & RADIX_DAX_ENTRY_LOCK; 192 return entry & DAX_LOCKED;
192} 193}
193 194
194/* 195/*
@@ -196,12 +197,11 @@ static inline int slot_locked(struct address_space *mapping, void **slot)
196 */ 197 */
197static inline void *lock_slot(struct address_space *mapping, void **slot) 198static inline void *lock_slot(struct address_space *mapping, void **slot)
198{ 199{
199 unsigned long entry = (unsigned long) 200 unsigned long v = xa_to_value(
200 radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); 201 radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
201 202 void *entry = xa_mk_value(v | DAX_LOCKED);
202 entry |= RADIX_DAX_ENTRY_LOCK; 203 radix_tree_replace_slot(&mapping->i_pages, slot, entry);
203 radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry); 204 return entry;
204 return (void *)entry;
205} 205}
206 206
207/* 207/*
@@ -209,17 +209,16 @@ static inline void *lock_slot(struct address_space *mapping, void **slot)
209 */ 209 */
210static inline void *unlock_slot(struct address_space *mapping, void **slot) 210static inline void *unlock_slot(struct address_space *mapping, void **slot)
211{ 211{
212 unsigned long entry = (unsigned long) 212 unsigned long v = xa_to_value(
213 radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); 213 radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
214 214 void *entry = xa_mk_value(v & ~DAX_LOCKED);
215 entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK; 215 radix_tree_replace_slot(&mapping->i_pages, slot, entry);
216 radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry); 216 return entry;
217 return (void *)entry;
218} 217}
219 218
220/* 219/*
221 * Lookup entry in radix tree, wait for it to become unlocked if it is 220 * Lookup entry in radix tree, wait for it to become unlocked if it is
222 * exceptional entry and return it. The caller must call 221 * a DAX entry and return it. The caller must call
223 * put_unlocked_mapping_entry() when he decided not to lock the entry or 222 * put_unlocked_mapping_entry() when he decided not to lock the entry or
224 * put_locked_mapping_entry() when he locked the entry and now wants to 223 * put_locked_mapping_entry() when he locked the entry and now wants to
225 * unlock it. 224 * unlock it.
@@ -242,7 +241,7 @@ static void *__get_unlocked_mapping_entry(struct address_space *mapping,
242 entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, 241 entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
243 &slot); 242 &slot);
244 if (!entry || 243 if (!entry ||
245 WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) || 244 WARN_ON_ONCE(!xa_is_value(entry)) ||
246 !slot_locked(mapping, slot)) { 245 !slot_locked(mapping, slot)) {
247 if (slotp) 246 if (slotp)
248 *slotp = slot; 247 *slotp = slot;
@@ -283,7 +282,7 @@ static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
283 282
284 xa_lock_irq(&mapping->i_pages); 283 xa_lock_irq(&mapping->i_pages);
285 entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); 284 entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot);
286 if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) || 285 if (WARN_ON_ONCE(!entry || !xa_is_value(entry) ||
287 !slot_locked(mapping, slot))) { 286 !slot_locked(mapping, slot))) {
288 xa_unlock_irq(&mapping->i_pages); 287 xa_unlock_irq(&mapping->i_pages);
289 return; 288 return;
@@ -472,12 +471,11 @@ void dax_unlock_mapping_entry(struct page *page)
472} 471}
473 472
474/* 473/*
475 * Find radix tree entry at given index. If it points to an exceptional entry, 474 * Find radix tree entry at given index. If it is a DAX entry, return it
476 * return it with the radix tree entry locked. If the radix tree doesn't 475 * with the radix tree entry locked. If the radix tree doesn't contain the
477 * contain given index, create an empty exceptional entry for the index and 476 * given index, create an empty entry for the index and return with it locked.
478 * return with it locked.
479 * 477 *
480 * When requesting an entry with size RADIX_DAX_PMD, grab_mapping_entry() will 478 * When requesting an entry with size DAX_PMD, grab_mapping_entry() will
481 * either return that locked entry or will return an error. This error will 479 * either return that locked entry or will return an error. This error will
482 * happen if there are any 4k entries within the 2MiB range that we are 480 * happen if there are any 4k entries within the 2MiB range that we are
483 * requesting. 481 * requesting.
@@ -507,13 +505,13 @@ restart:
507 xa_lock_irq(&mapping->i_pages); 505 xa_lock_irq(&mapping->i_pages);
508 entry = get_unlocked_mapping_entry(mapping, index, &slot); 506 entry = get_unlocked_mapping_entry(mapping, index, &slot);
509 507
510 if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) { 508 if (WARN_ON_ONCE(entry && !xa_is_value(entry))) {
511 entry = ERR_PTR(-EIO); 509 entry = ERR_PTR(-EIO);
512 goto out_unlock; 510 goto out_unlock;
513 } 511 }
514 512
515 if (entry) { 513 if (entry) {
516 if (size_flag & RADIX_DAX_PMD) { 514 if (size_flag & DAX_PMD) {
517 if (dax_is_pte_entry(entry)) { 515 if (dax_is_pte_entry(entry)) {
518 put_unlocked_mapping_entry(mapping, index, 516 put_unlocked_mapping_entry(mapping, index,
519 entry); 517 entry);
@@ -584,7 +582,7 @@ restart:
584 true); 582 true);
585 } 583 }
586 584
587 entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY); 585 entry = dax_radix_locked_entry(0, size_flag | DAX_EMPTY);
588 586
589 err = __radix_tree_insert(&mapping->i_pages, index, 587 err = __radix_tree_insert(&mapping->i_pages, index,
590 dax_radix_order(entry), entry); 588 dax_radix_order(entry), entry);
@@ -673,8 +671,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
673 if (index >= end) 671 if (index >= end)
674 break; 672 break;
675 673
676 if (WARN_ON_ONCE( 674 if (WARN_ON_ONCE(!xa_is_value(pvec_ent)))
677 !radix_tree_exceptional_entry(pvec_ent)))
678 continue; 675 continue;
679 676
680 xa_lock_irq(&mapping->i_pages); 677 xa_lock_irq(&mapping->i_pages);
@@ -713,7 +710,7 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping,
713 710
714 xa_lock_irq(pages); 711 xa_lock_irq(pages);
715 entry = get_unlocked_mapping_entry(mapping, index, NULL); 712 entry = get_unlocked_mapping_entry(mapping, index, NULL);
716 if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry))) 713 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
717 goto out; 714 goto out;
718 if (!trunc && 715 if (!trunc &&
719 (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) || 716 (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) ||
@@ -729,8 +726,8 @@ out:
729 return ret; 726 return ret;
730} 727}
731/* 728/*
732 * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree 729 * Delete DAX entry at @index from @mapping. Wait for it
733 * entry to get unlocked before deleting it. 730 * to be unlocked before deleting it.
734 */ 731 */
735int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) 732int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
736{ 733{
@@ -740,7 +737,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
740 * This gets called from truncate / punch_hole path. As such, the caller 737 * This gets called from truncate / punch_hole path. As such, the caller
741 * must hold locks protecting against concurrent modifications of the 738 * must hold locks protecting against concurrent modifications of the
742 * radix tree (usually fs-private i_mmap_sem for writing). Since the 739 * radix tree (usually fs-private i_mmap_sem for writing). Since the
743 * caller has seen exceptional entry for this index, we better find it 740 * caller has seen a DAX entry for this index, we better find it
744 * at that index as well... 741 * at that index as well...
745 */ 742 */
746 WARN_ON_ONCE(!ret); 743 WARN_ON_ONCE(!ret);
@@ -748,7 +745,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
748} 745}
749 746
750/* 747/*
751 * Invalidate exceptional DAX entry if it is clean. 748 * Invalidate DAX entry if it is clean.
752 */ 749 */
753int dax_invalidate_mapping_entry_sync(struct address_space *mapping, 750int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
754 pgoff_t index) 751 pgoff_t index)
@@ -802,7 +799,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
802 if (dirty) 799 if (dirty)
803 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 800 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
804 801
805 if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) { 802 if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
806 /* we are replacing a zero page with block mapping */ 803 /* we are replacing a zero page with block mapping */
807 if (dax_is_pmd_entry(entry)) 804 if (dax_is_pmd_entry(entry))
808 unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR, 805 unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
@@ -940,13 +937,13 @@ static int dax_writeback_one(struct dax_device *dax_dev,
940 * A page got tagged dirty in DAX mapping? Something is seriously 937 * A page got tagged dirty in DAX mapping? Something is seriously
941 * wrong. 938 * wrong.
942 */ 939 */
943 if (WARN_ON(!radix_tree_exceptional_entry(entry))) 940 if (WARN_ON(!xa_is_value(entry)))
944 return -EIO; 941 return -EIO;
945 942
946 xa_lock_irq(pages); 943 xa_lock_irq(pages);
947 entry2 = get_unlocked_mapping_entry(mapping, index, &slot); 944 entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
948 /* Entry got punched out / reallocated? */ 945 /* Entry got punched out / reallocated? */
949 if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2))) 946 if (!entry2 || WARN_ON_ONCE(!xa_is_value(entry2)))
950 goto put_unlocked; 947 goto put_unlocked;
951 /* 948 /*
952 * Entry got reallocated elsewhere? No need to writeback. We have to 949 * Entry got reallocated elsewhere? No need to writeback. We have to
@@ -1123,8 +1120,9 @@ static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry,
1123 pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); 1120 pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
1124 vm_fault_t ret; 1121 vm_fault_t ret;
1125 1122
1126 dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE, 1123 dax_insert_mapping_entry(mapping, vmf, entry, pfn,
1127 false); 1124 DAX_ZERO_PAGE, false);
1125
1128 ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); 1126 ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
1129 trace_dax_load_hole(inode, vmf, ret); 1127 trace_dax_load_hole(inode, vmf, ret);
1130 return ret; 1128 return ret;
@@ -1514,7 +1512,7 @@ static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
1514 1512
1515 pfn = page_to_pfn_t(zero_page); 1513 pfn = page_to_pfn_t(zero_page);
1516 ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn, 1514 ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
1517 RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false); 1515 DAX_PMD | DAX_ZERO_PAGE, false);
1518 1516
1519 ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); 1517 ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
1520 if (!pmd_none(*(vmf->pmd))) { 1518 if (!pmd_none(*(vmf->pmd))) {
@@ -1597,7 +1595,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1597 * is already in the tree, for instance), it will return -EEXIST and 1595 * is already in the tree, for instance), it will return -EEXIST and
1598 * we just fall back to 4k entries. 1596 * we just fall back to 4k entries.
1599 */ 1597 */
1600 entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); 1598 entry = grab_mapping_entry(mapping, pgoff, DAX_PMD);
1601 if (IS_ERR(entry)) 1599 if (IS_ERR(entry))
1602 goto fallback; 1600 goto fallback;
1603 1601
@@ -1635,7 +1633,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1635 goto finish_iomap; 1633 goto finish_iomap;
1636 1634
1637 entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, 1635 entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
1638 RADIX_DAX_PMD, write && !sync); 1636 DAX_PMD, write && !sync);
1639 1637
1640 /* 1638 /*
1641 * If we are doing synchronous page fault and inode needs fsync, 1639 * If we are doing synchronous page fault and inode needs fsync,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5ea1d64cb0b4..669abb617321 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -521,7 +521,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
521 if (!page) 521 if (!page)
522 return; 522 return;
523 523
524 if (radix_tree_exceptional_entry(page)) 524 if (xa_is_value(page))
525 mss->swap += PAGE_SIZE; 525 mss->swap += PAGE_SIZE;
526 else 526 else
527 put_page(page); 527 put_page(page);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 34149e8b5f73..e9e76ab4fbbf 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -28,34 +28,26 @@
28#include <linux/rcupdate.h> 28#include <linux/rcupdate.h>
29#include <linux/spinlock.h> 29#include <linux/spinlock.h>
30#include <linux/types.h> 30#include <linux/types.h>
31#include <linux/xarray.h>
31 32
32/* 33/*
33 * The bottom two bits of the slot determine how the remaining bits in the 34 * The bottom two bits of the slot determine how the remaining bits in the
34 * slot are interpreted: 35 * slot are interpreted:
35 * 36 *
36 * 00 - data pointer 37 * 00 - data pointer
37 * 01 - internal entry 38 * 10 - internal entry
38 * 10 - exceptional entry 39 * x1 - value entry
39 * 11 - this bit combination is currently unused/reserved
40 * 40 *
41 * The internal entry may be a pointer to the next level in the tree, a 41 * The internal entry may be a pointer to the next level in the tree, a
42 * sibling entry, or an indicator that the entry in this slot has been moved 42 * sibling entry, or an indicator that the entry in this slot has been moved
43 * to another location in the tree and the lookup should be restarted. While 43 * to another location in the tree and the lookup should be restarted. While
44 * NULL fits the 'data pointer' pattern, it means that there is no entry in 44 * NULL fits the 'data pointer' pattern, it means that there is no entry in
45 * the tree for this index (no matter what level of the tree it is found at). 45 * the tree for this index (no matter what level of the tree it is found at).
46 * This means that you cannot store NULL in the tree as a value for the index. 46 * This means that storing a NULL entry in the tree is the same as deleting
47 * the entry from the tree.
47 */ 48 */
48#define RADIX_TREE_ENTRY_MASK 3UL 49#define RADIX_TREE_ENTRY_MASK 3UL
49#define RADIX_TREE_INTERNAL_NODE 1UL 50#define RADIX_TREE_INTERNAL_NODE 2UL
50
51/*
52 * Most users of the radix tree store pointers but shmem/tmpfs stores swap
53 * entries in the same tree. They are marked as exceptional entries to
54 * distinguish them from pointers to struct page.
55 * EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it.
56 */
57#define RADIX_TREE_EXCEPTIONAL_ENTRY 2
58#define RADIX_TREE_EXCEPTIONAL_SHIFT 2
59 51
60static inline bool radix_tree_is_internal_node(void *ptr) 52static inline bool radix_tree_is_internal_node(void *ptr)
61{ 53{
@@ -83,11 +75,10 @@ static inline bool radix_tree_is_internal_node(void *ptr)
83 75
84/* 76/*
85 * @count is the count of every non-NULL element in the ->slots array 77 * @count is the count of every non-NULL element in the ->slots array
86 * whether that is an exceptional entry, a retry entry, a user pointer, 78 * whether that is a value entry, a retry entry, a user pointer,
87 * a sibling entry or a pointer to the next level of the tree. 79 * a sibling entry or a pointer to the next level of the tree.
88 * @exceptional is the count of every element in ->slots which is 80 * @exceptional is the count of every element in ->slots which is
89 * either radix_tree_exceptional_entry() or is a sibling entry for an 81 * either a value entry or a sibling of a value entry.
90 * exceptional entry.
91 */ 82 */
92struct radix_tree_node { 83struct radix_tree_node {
93 unsigned char shift; /* Bits remaining in each slot */ 84 unsigned char shift; /* Bits remaining in each slot */
@@ -269,17 +260,6 @@ static inline int radix_tree_deref_retry(void *arg)
269} 260}
270 261
271/** 262/**
272 * radix_tree_exceptional_entry - radix_tree_deref_slot gave exceptional entry?
273 * @arg: value returned by radix_tree_deref_slot
274 * Returns: 0 if well-aligned pointer, non-0 if exceptional entry.
275 */
276static inline int radix_tree_exceptional_entry(void *arg)
277{
278 /* Not unlikely because radix_tree_exception often tested first */
279 return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY;
280}
281
282/**
283 * radix_tree_exception - radix_tree_deref_slot returned either exception? 263 * radix_tree_exception - radix_tree_deref_slot returned either exception?
284 * @arg: value returned by radix_tree_deref_slot 264 * @arg: value returned by radix_tree_deref_slot
285 * Returns: 0 if well-aligned pointer, non-0 if either kind of exception. 265 * Returns: 0 if well-aligned pointer, non-0 if either kind of exception.
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 22af9d8a84ae..4d961668e5fc 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -18,9 +18,8 @@
18 * 18 *
19 * swp_entry_t's are *never* stored anywhere in their arch-dependent format. 19 * swp_entry_t's are *never* stored anywhere in their arch-dependent format.
20 */ 20 */
21#define SWP_TYPE_SHIFT(e) ((sizeof(e.val) * 8) - \ 21#define SWP_TYPE_SHIFT (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT)
22 (MAX_SWAPFILES_SHIFT + RADIX_TREE_EXCEPTIONAL_SHIFT)) 22#define SWP_OFFSET_MASK ((1UL << SWP_TYPE_SHIFT) - 1)
23#define SWP_OFFSET_MASK(e) ((1UL << SWP_TYPE_SHIFT(e)) - 1)
24 23
25/* 24/*
26 * Store a type+offset into a swp_entry_t in an arch-independent format 25 * Store a type+offset into a swp_entry_t in an arch-independent format
@@ -29,8 +28,7 @@ static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset)
29{ 28{
30 swp_entry_t ret; 29 swp_entry_t ret;
31 30
32 ret.val = (type << SWP_TYPE_SHIFT(ret)) | 31 ret.val = (type << SWP_TYPE_SHIFT) | (offset & SWP_OFFSET_MASK);
33 (offset & SWP_OFFSET_MASK(ret));
34 return ret; 32 return ret;
35} 33}
36 34
@@ -40,7 +38,7 @@ static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset)
40 */ 38 */
41static inline unsigned swp_type(swp_entry_t entry) 39static inline unsigned swp_type(swp_entry_t entry)
42{ 40{
43 return (entry.val >> SWP_TYPE_SHIFT(entry)); 41 return (entry.val >> SWP_TYPE_SHIFT);
44} 42}
45 43
46/* 44/*
@@ -49,7 +47,7 @@ static inline unsigned swp_type(swp_entry_t entry)
49 */ 47 */
50static inline pgoff_t swp_offset(swp_entry_t entry) 48static inline pgoff_t swp_offset(swp_entry_t entry)
51{ 49{
52 return entry.val & SWP_OFFSET_MASK(entry); 50 return entry.val & SWP_OFFSET_MASK;
53} 51}
54 52
55#ifdef CONFIG_MMU 53#ifdef CONFIG_MMU
@@ -90,16 +88,13 @@ static inline swp_entry_t radix_to_swp_entry(void *arg)
90{ 88{
91 swp_entry_t entry; 89 swp_entry_t entry;
92 90
93 entry.val = (unsigned long)arg >> RADIX_TREE_EXCEPTIONAL_SHIFT; 91 entry.val = xa_to_value(arg);
94 return entry; 92 return entry;
95} 93}
96 94
97static inline void *swp_to_radix_entry(swp_entry_t entry) 95static inline void *swp_to_radix_entry(swp_entry_t entry)
98{ 96{
99 unsigned long value; 97 return xa_mk_value(entry.val);
100
101 value = entry.val << RADIX_TREE_EXCEPTIONAL_SHIFT;
102 return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY);
103} 98}
104 99
105#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) 100#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 9e4c86853fa4..5c8acfc4ff55 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -5,9 +5,111 @@
5 * eXtensible Arrays 5 * eXtensible Arrays
6 * Copyright (c) 2017 Microsoft Corporation 6 * Copyright (c) 2017 Microsoft Corporation
7 * Author: Matthew Wilcox <willy@infradead.org> 7 * Author: Matthew Wilcox <willy@infradead.org>
8 *
9 * See Documentation/core-api/xarray.rst for how to use the XArray.
8 */ 10 */
9 11
12#include <linux/bug.h>
10#include <linux/spinlock.h> 13#include <linux/spinlock.h>
14#include <linux/types.h>
15
16/*
17 * The bottom two bits of the entry determine how the XArray interprets
18 * the contents:
19 *
20 * 00: Pointer entry
21 * 10: Internal entry
22 * x1: Value entry or tagged pointer
23 *
24 * Attempting to store internal entries in the XArray is a bug.
25 */
26
27#define BITS_PER_XA_VALUE (BITS_PER_LONG - 1)
28
29/**
30 * xa_mk_value() - Create an XArray entry from an integer.
31 * @v: Value to store in XArray.
32 *
33 * Context: Any context.
34 * Return: An entry suitable for storing in the XArray.
35 */
36static inline void *xa_mk_value(unsigned long v)
37{
38 WARN_ON((long)v < 0);
39 return (void *)((v << 1) | 1);
40}
41
42/**
43 * xa_to_value() - Get value stored in an XArray entry.
44 * @entry: XArray entry.
45 *
46 * Context: Any context.
47 * Return: The value stored in the XArray entry.
48 */
49static inline unsigned long xa_to_value(const void *entry)
50{
51 return (unsigned long)entry >> 1;
52}
53
54/**
55 * xa_is_value() - Determine if an entry is a value.
56 * @entry: XArray entry.
57 *
58 * Context: Any context.
59 * Return: True if the entry is a value, false if it is a pointer.
60 */
61static inline bool xa_is_value(const void *entry)
62{
63 return (unsigned long)entry & 1;
64}
65
66/**
67 * xa_tag_pointer() - Create an XArray entry for a tagged pointer.
68 * @p: Plain pointer.
69 * @tag: Tag value (0, 1 or 3).
70 *
71 * If the user of the XArray prefers, they can tag their pointers instead
72 * of storing value entries. Three tags are available (0, 1 and 3).
73 * These are distinct from the xa_mark_t as they are not replicated up
74 * through the array and cannot be searched for.
75 *
76 * Context: Any context.
77 * Return: An XArray entry.
78 */
79static inline void *xa_tag_pointer(void *p, unsigned long tag)
80{
81 return (void *)((unsigned long)p | tag);
82}
83
84/**
85 * xa_untag_pointer() - Turn an XArray entry into a plain pointer.
86 * @entry: XArray entry.
87 *
88 * If you have stored a tagged pointer in the XArray, call this function
89 * to get the untagged version of the pointer.
90 *
91 * Context: Any context.
92 * Return: A pointer.
93 */
94static inline void *xa_untag_pointer(void *entry)
95{
96 return (void *)((unsigned long)entry & ~3UL);
97}
98
99/**
100 * xa_pointer_tag() - Get the tag stored in an XArray entry.
101 * @entry: XArray entry.
102 *
103 * If you have stored a tagged pointer in the XArray, call this function
104 * to get the tag of that pointer.
105 *
106 * Context: Any context.
107 * Return: A tag.
108 */
109static inline unsigned int xa_pointer_tag(void *entry)
110{
111 return (unsigned long)entry & 3UL;
112}
11 113
12#define xa_trylock(xa) spin_trylock(&(xa)->xa_lock) 114#define xa_trylock(xa) spin_trylock(&(xa)->xa_lock)
13#define xa_lock(xa) spin_lock(&(xa)->xa_lock) 115#define xa_lock(xa) spin_lock(&(xa)->xa_lock)
diff --git a/lib/idr.c b/lib/idr.c
index 729e381e23b4..88419fbc5737 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -338,11 +338,8 @@ EXPORT_SYMBOL(idr_replace);
338 * by the number of bits in the leaf bitmap before doing a radix tree lookup. 338 * by the number of bits in the leaf bitmap before doing a radix tree lookup.
339 * 339 *
340 * As an optimisation, if there are only a few low bits set in any given 340 * As an optimisation, if there are only a few low bits set in any given
341 * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional 341 * leaf, instead of allocating a 128-byte bitmap, we store the bits
342 * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits 342 * directly in the entry.
343 * directly in the entry. By being really tricksy, we could store
344 * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising
345 * for 0-3 allocated IDs.
346 * 343 *
347 * We allow the radix tree 'exceptional' count to get out of date. Nothing 344 * We allow the radix tree 'exceptional' count to get out of date. Nothing
348 * in the IDA nor the radix tree code checks it. If it becomes important 345 * in the IDA nor the radix tree code checks it. If it becomes important
@@ -366,12 +363,11 @@ static int ida_get_new_above(struct ida *ida, int start)
366 struct radix_tree_iter iter; 363 struct radix_tree_iter iter;
367 struct ida_bitmap *bitmap; 364 struct ida_bitmap *bitmap;
368 unsigned long index; 365 unsigned long index;
369 unsigned bit, ebit; 366 unsigned bit;
370 int new; 367 int new;
371 368
372 index = start / IDA_BITMAP_BITS; 369 index = start / IDA_BITMAP_BITS;
373 bit = start % IDA_BITMAP_BITS; 370 bit = start % IDA_BITMAP_BITS;
374 ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT;
375 371
376 slot = radix_tree_iter_init(&iter, index); 372 slot = radix_tree_iter_init(&iter, index);
377 for (;;) { 373 for (;;) {
@@ -386,25 +382,23 @@ static int ida_get_new_above(struct ida *ida, int start)
386 return PTR_ERR(slot); 382 return PTR_ERR(slot);
387 } 383 }
388 } 384 }
389 if (iter.index > index) { 385 if (iter.index > index)
390 bit = 0; 386 bit = 0;
391 ebit = RADIX_TREE_EXCEPTIONAL_SHIFT;
392 }
393 new = iter.index * IDA_BITMAP_BITS; 387 new = iter.index * IDA_BITMAP_BITS;
394 bitmap = rcu_dereference_raw(*slot); 388 bitmap = rcu_dereference_raw(*slot);
395 if (radix_tree_exception(bitmap)) { 389 if (xa_is_value(bitmap)) {
396 unsigned long tmp = (unsigned long)bitmap; 390 unsigned long tmp = xa_to_value(bitmap);
397 ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit); 391 int vbit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE,
398 if (ebit < BITS_PER_LONG) { 392 bit);
399 tmp |= 1UL << ebit; 393 if (vbit < BITS_PER_XA_VALUE) {
400 rcu_assign_pointer(*slot, (void *)tmp); 394 tmp |= 1UL << vbit;
401 return new + ebit - 395 rcu_assign_pointer(*slot, xa_mk_value(tmp));
402 RADIX_TREE_EXCEPTIONAL_SHIFT; 396 return new + vbit;
403 } 397 }
404 bitmap = this_cpu_xchg(ida_bitmap, NULL); 398 bitmap = this_cpu_xchg(ida_bitmap, NULL);
405 if (!bitmap) 399 if (!bitmap)
406 return -EAGAIN; 400 return -EAGAIN;
407 bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT; 401 bitmap->bitmap[0] = tmp;
408 rcu_assign_pointer(*slot, bitmap); 402 rcu_assign_pointer(*slot, bitmap);
409 } 403 }
410 404
@@ -425,17 +419,14 @@ static int ida_get_new_above(struct ida *ida, int start)
425 new += bit; 419 new += bit;
426 if (new < 0) 420 if (new < 0)
427 return -ENOSPC; 421 return -ENOSPC;
428 if (ebit < BITS_PER_LONG) { 422 if (bit < BITS_PER_XA_VALUE) {
429 bitmap = (void *)((1UL << ebit) | 423 bitmap = xa_mk_value(1UL << bit);
430 RADIX_TREE_EXCEPTIONAL_ENTRY); 424 } else {
431 radix_tree_iter_replace(root, &iter, slot, 425 bitmap = this_cpu_xchg(ida_bitmap, NULL);
432 bitmap); 426 if (!bitmap)
433 return new; 427 return -EAGAIN;
428 __set_bit(bit, bitmap->bitmap);
434 } 429 }
435 bitmap = this_cpu_xchg(ida_bitmap, NULL);
436 if (!bitmap)
437 return -EAGAIN;
438 __set_bit(bit, bitmap->bitmap);
439 radix_tree_iter_replace(root, &iter, slot, bitmap); 430 radix_tree_iter_replace(root, &iter, slot, bitmap);
440 } 431 }
441 432
@@ -457,9 +448,9 @@ static void ida_remove(struct ida *ida, int id)
457 goto err; 448 goto err;
458 449
459 bitmap = rcu_dereference_raw(*slot); 450 bitmap = rcu_dereference_raw(*slot);
460 if (radix_tree_exception(bitmap)) { 451 if (xa_is_value(bitmap)) {
461 btmp = (unsigned long *)slot; 452 btmp = (unsigned long *)slot;
462 offset += RADIX_TREE_EXCEPTIONAL_SHIFT; 453 offset += 1; /* Intimate knowledge of the value encoding */
463 if (offset >= BITS_PER_LONG) 454 if (offset >= BITS_PER_LONG)
464 goto err; 455 goto err;
465 } else { 456 } else {
@@ -470,9 +461,8 @@ static void ida_remove(struct ida *ida, int id)
470 461
471 __clear_bit(offset, btmp); 462 __clear_bit(offset, btmp);
472 radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE); 463 radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
473 if (radix_tree_exception(bitmap)) { 464 if (xa_is_value(bitmap)) {
474 if (rcu_dereference_raw(*slot) == 465 if (xa_to_value(rcu_dereference_raw(*slot)) == 0)
475 (void *)RADIX_TREE_EXCEPTIONAL_ENTRY)
476 radix_tree_iter_delete(&ida->ida_rt, &iter, slot); 466 radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
477 } else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) { 467 } else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) {
478 kfree(bitmap); 468 kfree(bitmap);
@@ -503,7 +493,7 @@ void ida_destroy(struct ida *ida)
503 xa_lock_irqsave(&ida->ida_rt, flags); 493 xa_lock_irqsave(&ida->ida_rt, flags);
504 radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) { 494 radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
505 struct ida_bitmap *bitmap = rcu_dereference_raw(*slot); 495 struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
506 if (!radix_tree_exception(bitmap)) 496 if (!xa_is_value(bitmap))
507 kfree(bitmap); 497 kfree(bitmap);
508 radix_tree_iter_delete(&ida->ida_rt, &iter, slot); 498 radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
509 } 499 }
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index a904a8ddd174..b6c0e7f3a894 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -340,14 +340,12 @@ static void dump_ida_node(void *entry, unsigned long index)
340 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) 340 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
341 dump_ida_node(node->slots[i], 341 dump_ida_node(node->slots[i],
342 index | (i << node->shift)); 342 index | (i << node->shift));
343 } else if (radix_tree_exceptional_entry(entry)) { 343 } else if (xa_is_value(entry)) {
344 pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n", 344 pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n",
345 entry, (int)(index & RADIX_TREE_MAP_MASK), 345 entry, (int)(index & RADIX_TREE_MAP_MASK),
346 index * IDA_BITMAP_BITS, 346 index * IDA_BITMAP_BITS,
347 index * IDA_BITMAP_BITS + BITS_PER_LONG - 347 index * IDA_BITMAP_BITS + BITS_PER_XA_VALUE,
348 RADIX_TREE_EXCEPTIONAL_SHIFT, 348 xa_to_value(entry));
349 (unsigned long)entry >>
350 RADIX_TREE_EXCEPTIONAL_SHIFT);
351 } else { 349 } else {
352 struct ida_bitmap *bitmap = entry; 350 struct ida_bitmap *bitmap = entry;
353 351
@@ -656,7 +654,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
656 BUG_ON(shift > BITS_PER_LONG); 654 BUG_ON(shift > BITS_PER_LONG);
657 if (radix_tree_is_internal_node(entry)) { 655 if (radix_tree_is_internal_node(entry)) {
658 entry_to_node(entry)->parent = node; 656 entry_to_node(entry)->parent = node;
659 } else if (radix_tree_exceptional_entry(entry)) { 657 } else if (xa_is_value(entry)) {
660 /* Moving an exceptional root->rnode to a node */ 658 /* Moving an exceptional root->rnode to a node */
661 node->exceptional = 1; 659 node->exceptional = 1;
662 } 660 }
@@ -955,12 +953,12 @@ static inline int insert_entries(struct radix_tree_node *node,
955 !is_sibling_entry(node, old) && 953 !is_sibling_entry(node, old) &&
956 (old != RADIX_TREE_RETRY)) 954 (old != RADIX_TREE_RETRY))
957 radix_tree_free_nodes(old); 955 radix_tree_free_nodes(old);
958 if (radix_tree_exceptional_entry(old)) 956 if (xa_is_value(old))
959 node->exceptional--; 957 node->exceptional--;
960 } 958 }
961 if (node) { 959 if (node) {
962 node->count += n; 960 node->count += n;
963 if (radix_tree_exceptional_entry(item)) 961 if (xa_is_value(item))
964 node->exceptional += n; 962 node->exceptional += n;
965 } 963 }
966 return n; 964 return n;
@@ -974,7 +972,7 @@ static inline int insert_entries(struct radix_tree_node *node,
974 rcu_assign_pointer(*slot, item); 972 rcu_assign_pointer(*slot, item);
975 if (node) { 973 if (node) {
976 node->count++; 974 node->count++;
977 if (radix_tree_exceptional_entry(item)) 975 if (xa_is_value(item))
978 node->exceptional++; 976 node->exceptional++;
979 } 977 }
980 return 1; 978 return 1;
@@ -1190,8 +1188,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
1190 radix_tree_update_node_t update_node) 1188 radix_tree_update_node_t update_node)
1191{ 1189{
1192 void *old = rcu_dereference_raw(*slot); 1190 void *old = rcu_dereference_raw(*slot);
1193 int exceptional = !!radix_tree_exceptional_entry(item) - 1191 int exceptional = !!xa_is_value(item) - !!xa_is_value(old);
1194 !!radix_tree_exceptional_entry(old);
1195 int count = calculate_count(root, node, slot, item, old); 1192 int count = calculate_count(root, node, slot, item, old);
1196 1193
1197 /* 1194 /*
@@ -1992,7 +1989,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
1992 struct radix_tree_node *node, void __rcu **slot) 1989 struct radix_tree_node *node, void __rcu **slot)
1993{ 1990{
1994 void *old = rcu_dereference_raw(*slot); 1991 void *old = rcu_dereference_raw(*slot);
1995 int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0; 1992 int exceptional = xa_is_value(old) ? -1 : 0;
1996 unsigned offset = get_slot_offset(node, slot); 1993 unsigned offset = get_slot_offset(node, slot);
1997 int tag; 1994 int tag;
1998 1995
diff --git a/mm/filemap.c b/mm/filemap.c
index 52517f28e6f4..4de14e75c4ec 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -127,7 +127,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
127 127
128 p = radix_tree_deref_slot_protected(slot, 128 p = radix_tree_deref_slot_protected(slot,
129 &mapping->i_pages.xa_lock); 129 &mapping->i_pages.xa_lock);
130 if (!radix_tree_exceptional_entry(p)) 130 if (!xa_is_value(p))
131 return -EEXIST; 131 return -EEXIST;
132 132
133 mapping->nrexceptional--; 133 mapping->nrexceptional--;
@@ -336,7 +336,7 @@ page_cache_tree_delete_batch(struct address_space *mapping,
336 break; 336 break;
337 page = radix_tree_deref_slot_protected(slot, 337 page = radix_tree_deref_slot_protected(slot,
338 &mapping->i_pages.xa_lock); 338 &mapping->i_pages.xa_lock);
339 if (radix_tree_exceptional_entry(page)) 339 if (xa_is_value(page))
340 continue; 340 continue;
341 if (!tail_pages) { 341 if (!tail_pages) {
342 /* 342 /*
@@ -1355,7 +1355,7 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
1355 struct page *page; 1355 struct page *page;
1356 1356
1357 page = radix_tree_lookup(&mapping->i_pages, index); 1357 page = radix_tree_lookup(&mapping->i_pages, index);
1358 if (!page || radix_tree_exceptional_entry(page)) 1358 if (!page || xa_is_value(page))
1359 break; 1359 break;
1360 index++; 1360 index++;
1361 if (index == 0) 1361 if (index == 0)
@@ -1396,7 +1396,7 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
1396 struct page *page; 1396 struct page *page;
1397 1397
1398 page = radix_tree_lookup(&mapping->i_pages, index); 1398 page = radix_tree_lookup(&mapping->i_pages, index);
1399 if (!page || radix_tree_exceptional_entry(page)) 1399 if (!page || xa_is_value(page))
1400 break; 1400 break;
1401 index--; 1401 index--;
1402 if (index == ULONG_MAX) 1402 if (index == ULONG_MAX)
@@ -1539,7 +1539,7 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
1539 1539
1540repeat: 1540repeat:
1541 page = find_get_entry(mapping, offset); 1541 page = find_get_entry(mapping, offset);
1542 if (radix_tree_exceptional_entry(page)) 1542 if (xa_is_value(page))
1543 page = NULL; 1543 page = NULL;
1544 if (!page) 1544 if (!page)
1545 goto no_page; 1545 goto no_page;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index a31d740e6cd1..4820e4adf853 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1369,7 +1369,7 @@ static void collapse_shmem(struct mm_struct *mm,
1369 1369
1370 page = radix_tree_deref_slot_protected(slot, 1370 page = radix_tree_deref_slot_protected(slot,
1371 &mapping->i_pages.xa_lock); 1371 &mapping->i_pages.xa_lock);
1372 if (radix_tree_exceptional_entry(page) || !PageUptodate(page)) { 1372 if (xa_is_value(page) || !PageUptodate(page)) {
1373 xa_unlock_irq(&mapping->i_pages); 1373 xa_unlock_irq(&mapping->i_pages);
1374 /* swap in or instantiate fallocated page */ 1374 /* swap in or instantiate fallocated page */
1375 if (shmem_getpage(mapping->host, index, &page, 1375 if (shmem_getpage(mapping->host, index, &page,
diff --git a/mm/madvise.c b/mm/madvise.c
index 972a9eaa898b..9d802566c494 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -251,7 +251,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
251 index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 251 index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
252 252
253 page = find_get_entry(mapping, index); 253 page = find_get_entry(mapping, index);
254 if (!radix_tree_exceptional_entry(page)) { 254 if (!xa_is_value(page)) {
255 if (page) 255 if (page)
256 put_page(page); 256 put_page(page);
257 continue; 257 continue;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e79cb59552d9..29d9d1a69b36 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4750,7 +4750,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
4750 /* shmem/tmpfs may report page out on swap: account for that too. */ 4750 /* shmem/tmpfs may report page out on swap: account for that too. */
4751 if (shmem_mapping(mapping)) { 4751 if (shmem_mapping(mapping)) {
4752 page = find_get_entry(mapping, pgoff); 4752 page = find_get_entry(mapping, pgoff);
4753 if (radix_tree_exceptional_entry(page)) { 4753 if (xa_is_value(page)) {
4754 swp_entry_t swp = radix_to_swp_entry(page); 4754 swp_entry_t swp = radix_to_swp_entry(page);
4755 if (do_memsw_account()) 4755 if (do_memsw_account())
4756 *entry = swp; 4756 *entry = swp;
diff --git a/mm/mincore.c b/mm/mincore.c
index fc37afe226e6..4985965aa20a 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -66,7 +66,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
66 * shmem/tmpfs may return swap: account for swapcache 66 * shmem/tmpfs may return swap: account for swapcache
67 * page too. 67 * page too.
68 */ 68 */
69 if (radix_tree_exceptional_entry(page)) { 69 if (xa_is_value(page)) {
70 swp_entry_t swp = radix_to_swp_entry(page); 70 swp_entry_t swp = radix_to_swp_entry(page);
71 page = find_get_page(swap_address_space(swp), 71 page = find_get_page(swap_address_space(swp),
72 swp_offset(swp)); 72 swp_offset(swp));
diff --git a/mm/readahead.c b/mm/readahead.c
index 4e630143a0ba..de657077d41d 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -179,7 +179,7 @@ unsigned int __do_page_cache_readahead(struct address_space *mapping,
179 rcu_read_lock(); 179 rcu_read_lock();
180 page = radix_tree_lookup(&mapping->i_pages, page_offset); 180 page = radix_tree_lookup(&mapping->i_pages, page_offset);
181 rcu_read_unlock(); 181 rcu_read_unlock();
182 if (page && !radix_tree_exceptional_entry(page)) { 182 if (page && !xa_is_value(page)) {
183 /* 183 /*
184 * Page already present? Kick off the current batch of 184 * Page already present? Kick off the current batch of
185 * contiguous pages before continuing with the next 185 * contiguous pages before continuing with the next
diff --git a/mm/shmem.c b/mm/shmem.c
index 446942677cd4..c1062760fe41 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -709,7 +709,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
709 continue; 709 continue;
710 } 710 }
711 711
712 if (radix_tree_exceptional_entry(page)) 712 if (xa_is_value(page))
713 swapped++; 713 swapped++;
714 714
715 if (need_resched()) { 715 if (need_resched()) {
@@ -824,7 +824,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
824 if (index >= end) 824 if (index >= end)
825 break; 825 break;
826 826
827 if (radix_tree_exceptional_entry(page)) { 827 if (xa_is_value(page)) {
828 if (unfalloc) 828 if (unfalloc)
829 continue; 829 continue;
830 nr_swaps_freed += !shmem_free_swap(mapping, 830 nr_swaps_freed += !shmem_free_swap(mapping,
@@ -921,7 +921,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
921 if (index >= end) 921 if (index >= end)
922 break; 922 break;
923 923
924 if (radix_tree_exceptional_entry(page)) { 924 if (xa_is_value(page)) {
925 if (unfalloc) 925 if (unfalloc)
926 continue; 926 continue;
927 if (shmem_free_swap(mapping, index, page)) { 927 if (shmem_free_swap(mapping, index, page)) {
@@ -1643,7 +1643,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
1643repeat: 1643repeat:
1644 swap.val = 0; 1644 swap.val = 0;
1645 page = find_lock_entry(mapping, index); 1645 page = find_lock_entry(mapping, index);
1646 if (radix_tree_exceptional_entry(page)) { 1646 if (xa_is_value(page)) {
1647 swap = radix_to_swp_entry(page); 1647 swap = radix_to_swp_entry(page);
1648 page = NULL; 1648 page = NULL;
1649 } 1649 }
@@ -2578,7 +2578,7 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
2578 index = indices[i]; 2578 index = indices[i];
2579 } 2579 }
2580 page = pvec.pages[i]; 2580 page = pvec.pages[i];
2581 if (page && !radix_tree_exceptional_entry(page)) { 2581 if (page && !xa_is_value(page)) {
2582 if (!PageUptodate(page)) 2582 if (!PageUptodate(page))
2583 page = NULL; 2583 page = NULL;
2584 } 2584 }
diff --git a/mm/swap.c b/mm/swap.c
index 26fc9b5f1b6c..4c5c7fcc6e46 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -965,7 +965,7 @@ void pagevec_remove_exceptionals(struct pagevec *pvec)
965 965
966 for (i = 0, j = 0; i < pagevec_count(pvec); i++) { 966 for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
967 struct page *page = pvec->pages[i]; 967 struct page *page = pvec->pages[i];
968 if (!radix_tree_exceptional_entry(page)) 968 if (!xa_is_value(page))
969 pvec->pages[j++] = page; 969 pvec->pages[j++] = page;
970 } 970 }
971 pvec->nr = j; 971 pvec->nr = j;
diff --git a/mm/truncate.c b/mm/truncate.c
index 1d2fb2dca96f..ed778555c9f3 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -70,7 +70,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
70 return; 70 return;
71 71
72 for (j = 0; j < pagevec_count(pvec); j++) 72 for (j = 0; j < pagevec_count(pvec); j++)
73 if (radix_tree_exceptional_entry(pvec->pages[j])) 73 if (xa_is_value(pvec->pages[j]))
74 break; 74 break;
75 75
76 if (j == pagevec_count(pvec)) 76 if (j == pagevec_count(pvec))
@@ -85,7 +85,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
85 struct page *page = pvec->pages[i]; 85 struct page *page = pvec->pages[i];
86 pgoff_t index = indices[i]; 86 pgoff_t index = indices[i];
87 87
88 if (!radix_tree_exceptional_entry(page)) { 88 if (!xa_is_value(page)) {
89 pvec->pages[j++] = page; 89 pvec->pages[j++] = page;
90 continue; 90 continue;
91 } 91 }
@@ -347,7 +347,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
347 if (index >= end) 347 if (index >= end)
348 break; 348 break;
349 349
350 if (radix_tree_exceptional_entry(page)) 350 if (xa_is_value(page))
351 continue; 351 continue;
352 352
353 if (!trylock_page(page)) 353 if (!trylock_page(page))
@@ -442,7 +442,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
442 break; 442 break;
443 } 443 }
444 444
445 if (radix_tree_exceptional_entry(page)) 445 if (xa_is_value(page))
446 continue; 446 continue;
447 447
448 lock_page(page); 448 lock_page(page);
@@ -561,7 +561,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
561 if (index > end) 561 if (index > end)
562 break; 562 break;
563 563
564 if (radix_tree_exceptional_entry(page)) { 564 if (xa_is_value(page)) {
565 invalidate_exceptional_entry(mapping, index, 565 invalidate_exceptional_entry(mapping, index,
566 page); 566 page);
567 continue; 567 continue;
@@ -692,7 +692,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
692 if (index > end) 692 if (index > end)
693 break; 693 break;
694 694
695 if (radix_tree_exceptional_entry(page)) { 695 if (xa_is_value(page)) {
696 if (!invalidate_exceptional_entry2(mapping, 696 if (!invalidate_exceptional_entry2(mapping,
697 index, page)) 697 index, page))
698 ret = -EBUSY; 698 ret = -EBUSY;
diff --git a/mm/workingset.c b/mm/workingset.c
index 4516dd790129..bb109b3afac2 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -155,8 +155,8 @@
155 * refault distance will immediately activate the refaulting page. 155 * refault distance will immediately activate the refaulting page.
156 */ 156 */
157 157
158#define EVICTION_SHIFT (RADIX_TREE_EXCEPTIONAL_ENTRY + \ 158#define EVICTION_SHIFT ((BITS_PER_LONG - BITS_PER_XA_VALUE) + \
159 NODES_SHIFT + \ 159 NODES_SHIFT + \
160 MEM_CGROUP_ID_SHIFT) 160 MEM_CGROUP_ID_SHIFT)
161#define EVICTION_MASK (~0UL >> EVICTION_SHIFT) 161#define EVICTION_MASK (~0UL >> EVICTION_SHIFT)
162 162
@@ -173,20 +173,19 @@ static unsigned int bucket_order __read_mostly;
173static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction) 173static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction)
174{ 174{
175 eviction >>= bucket_order; 175 eviction >>= bucket_order;
176 eviction &= EVICTION_MASK;
176 eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; 177 eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
177 eviction = (eviction << NODES_SHIFT) | pgdat->node_id; 178 eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
178 eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT);
179 179
180 return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY); 180 return xa_mk_value(eviction);
181} 181}
182 182
183static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, 183static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
184 unsigned long *evictionp) 184 unsigned long *evictionp)
185{ 185{
186 unsigned long entry = (unsigned long)shadow; 186 unsigned long entry = xa_to_value(shadow);
187 int memcgid, nid; 187 int memcgid, nid;
188 188
189 entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT;
190 nid = entry & ((1UL << NODES_SHIFT) - 1); 189 nid = entry & ((1UL << NODES_SHIFT) - 1);
191 entry >>= NODES_SHIFT; 190 entry >>= NODES_SHIFT;
192 memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1); 191 memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
@@ -453,7 +452,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
453 goto out_invalid; 452 goto out_invalid;
454 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { 453 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
455 if (node->slots[i]) { 454 if (node->slots[i]) {
456 if (WARN_ON_ONCE(!radix_tree_exceptional_entry(node->slots[i]))) 455 if (WARN_ON_ONCE(!xa_is_value(node->slots[i])))
457 goto out_invalid; 456 goto out_invalid;
458 if (WARN_ON_ONCE(!node->exceptional)) 457 if (WARN_ON_ONCE(!node->exceptional))
459 goto out_invalid; 458 goto out_invalid;
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index f620c831a4b5..a5a4494922a6 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -19,7 +19,7 @@
19 19
20#include "test.h" 20#include "test.h"
21 21
22#define DUMMY_PTR ((void *)0x12) 22#define DUMMY_PTR ((void *)0x10)
23 23
24int item_idr_free(int id, void *p, void *data) 24int item_idr_free(int id, void *p, void *data)
25{ 25{
@@ -411,11 +411,11 @@ void ida_check_conv_user(void)
411 int id = ida_alloc(&ida, GFP_NOWAIT); 411 int id = ida_alloc(&ida, GFP_NOWAIT);
412 if (id == -ENOMEM) { 412 if (id == -ENOMEM) {
413 IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) != 413 IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) !=
414 BITS_PER_LONG - 2); 414 BITS_PER_XA_VALUE);
415 id = ida_alloc(&ida, GFP_KERNEL); 415 id = ida_alloc(&ida, GFP_KERNEL);
416 } else { 416 } else {
417 IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) == 417 IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) ==
418 BITS_PER_LONG - 2); 418 BITS_PER_XA_VALUE);
419 } 419 }
420 IDA_BUG_ON(&ida, id != i); 420 IDA_BUG_ON(&ida, id != i);
421 } 421 }
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
index 24f13d27a8da..d1635a5bef02 100644
--- a/tools/testing/radix-tree/linux/radix-tree.h
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -2,7 +2,6 @@
2#ifndef _TEST_RADIX_TREE_H 2#ifndef _TEST_RADIX_TREE_H
3#define _TEST_RADIX_TREE_H 3#define _TEST_RADIX_TREE_H
4 4
5#include "generated/map-shift.h"
6#include "../../../../include/linux/radix-tree.h" 5#include "../../../../include/linux/radix-tree.h"
7 6
8extern int kmalloc_verbose; 7extern int kmalloc_verbose;
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 7bf405638b0b..2b4f4dba1882 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -39,12 +39,11 @@ static void __multiorder_tag_test(int index, int order)
39 39
40 /* 40 /*
41 * Verify we get collisions for covered indices. We try and fail to 41 * Verify we get collisions for covered indices. We try and fail to
42 * insert an exceptional entry so we don't leak memory via 42 * insert a value entry so we don't leak memory via
43 * item_insert_order(). 43 * item_insert_order().
44 */ 44 */
45 for_each_index(i, base, order) { 45 for_each_index(i, base, order) {
46 err = __radix_tree_insert(&tree, i, order, 46 err = __radix_tree_insert(&tree, i, order, xa_mk_value(0xA0));
47 (void *)(0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY));
48 assert(err == -EEXIST); 47 assert(err == -EEXIST);
49 } 48 }
50 49
@@ -380,8 +379,8 @@ static void multiorder_join1(unsigned long index,
380} 379}
381 380
382/* 381/*
383 * Check that the accounting of exceptional entries is handled correctly 382 * Check that the accounting of value entries is handled correctly
384 * by joining an exceptional entry to a normal pointer. 383 * by joining a value entry to a normal pointer.
385 */ 384 */
386static void multiorder_join2(unsigned order1, unsigned order2) 385static void multiorder_join2(unsigned order1, unsigned order2)
387{ 386{
@@ -391,9 +390,9 @@ static void multiorder_join2(unsigned order1, unsigned order2)
391 void *item2; 390 void *item2;
392 391
393 item_insert_order(&tree, 0, order2); 392 item_insert_order(&tree, 0, order2);
394 radix_tree_insert(&tree, 1 << order2, (void *)0x12UL); 393 radix_tree_insert(&tree, 1 << order2, xa_mk_value(5));
395 item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL); 394 item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
396 assert(item2 == (void *)0x12UL); 395 assert(item2 == xa_mk_value(5));
397 assert(node->exceptional == 1); 396 assert(node->exceptional == 1);
398 397
399 item2 = radix_tree_lookup(&tree, 0); 398 item2 = radix_tree_lookup(&tree, 0);
@@ -407,7 +406,7 @@ static void multiorder_join2(unsigned order1, unsigned order2)
407} 406}
408 407
409/* 408/*
410 * This test revealed an accounting bug for exceptional entries at one point. 409 * This test revealed an accounting bug for value entries at one point.
411 * Nodes were being freed back into the pool with an elevated exception count 410 * Nodes were being freed back into the pool with an elevated exception count
412 * by radix_tree_join() and then radix_tree_split() was failing to zero the 411 * by radix_tree_join() and then radix_tree_split() was failing to zero the
413 * count of exceptional entries. 412 * count of exceptional entries.
@@ -421,16 +420,16 @@ static void multiorder_join3(unsigned int order)
421 unsigned long i; 420 unsigned long i;
422 421
423 for (i = 0; i < (1 << order); i++) { 422 for (i = 0; i < (1 << order); i++) {
424 radix_tree_insert(&tree, i, (void *)0x12UL); 423 radix_tree_insert(&tree, i, xa_mk_value(5));
425 } 424 }
426 425
427 radix_tree_join(&tree, 0, order, (void *)0x16UL); 426 radix_tree_join(&tree, 0, order, xa_mk_value(7));
428 rcu_barrier(); 427 rcu_barrier();
429 428
430 radix_tree_split(&tree, 0, 0); 429 radix_tree_split(&tree, 0, 0);
431 430
432 radix_tree_for_each_slot(slot, &tree, &iter, 0) { 431 radix_tree_for_each_slot(slot, &tree, &iter, 0) {
433 radix_tree_iter_replace(&tree, &iter, slot, (void *)0x12UL); 432 radix_tree_iter_replace(&tree, &iter, slot, xa_mk_value(5));
434 } 433 }
435 434
436 __radix_tree_lookup(&tree, 0, &node, NULL); 435 __radix_tree_lookup(&tree, 0, &node, NULL);
@@ -517,10 +516,10 @@ static void __multiorder_split2(int old_order, int new_order)
517 struct radix_tree_node *node; 516 struct radix_tree_node *node;
518 void *item; 517 void *item;
519 518
520 __radix_tree_insert(&tree, 0, old_order, (void *)0x12); 519 __radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
521 520
522 item = __radix_tree_lookup(&tree, 0, &node, NULL); 521 item = __radix_tree_lookup(&tree, 0, &node, NULL);
523 assert(item == (void *)0x12); 522 assert(item == xa_mk_value(5));
524 assert(node->exceptional > 0); 523 assert(node->exceptional > 0);
525 524
526 radix_tree_split(&tree, 0, new_order); 525 radix_tree_split(&tree, 0, new_order);
@@ -530,7 +529,7 @@ static void __multiorder_split2(int old_order, int new_order)
530 } 529 }
531 530
532 item = __radix_tree_lookup(&tree, 0, &node, NULL); 531 item = __radix_tree_lookup(&tree, 0, &node, NULL);
533 assert(item != (void *)0x12); 532 assert(item != xa_mk_value(5));
534 assert(node->exceptional == 0); 533 assert(node->exceptional == 0);
535 534
536 item_kill_tree(&tree); 535 item_kill_tree(&tree);
@@ -544,40 +543,40 @@ static void __multiorder_split3(int old_order, int new_order)
544 struct radix_tree_node *node; 543 struct radix_tree_node *node;
545 void *item; 544 void *item;
546 545
547 __radix_tree_insert(&tree, 0, old_order, (void *)0x12); 546 __radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
548 547
549 item = __radix_tree_lookup(&tree, 0, &node, NULL); 548 item = __radix_tree_lookup(&tree, 0, &node, NULL);
550 assert(item == (void *)0x12); 549 assert(item == xa_mk_value(5));
551 assert(node->exceptional > 0); 550 assert(node->exceptional > 0);
552 551
553 radix_tree_split(&tree, 0, new_order); 552 radix_tree_split(&tree, 0, new_order);
554 radix_tree_for_each_slot(slot, &tree, &iter, 0) { 553 radix_tree_for_each_slot(slot, &tree, &iter, 0) {
555 radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16); 554 radix_tree_iter_replace(&tree, &iter, slot, xa_mk_value(7));
556 } 555 }
557 556
558 item = __radix_tree_lookup(&tree, 0, &node, NULL); 557 item = __radix_tree_lookup(&tree, 0, &node, NULL);
559 assert(item == (void *)0x16); 558 assert(item == xa_mk_value(7));
560 assert(node->exceptional > 0); 559 assert(node->exceptional > 0);
561 560
562 item_kill_tree(&tree); 561 item_kill_tree(&tree);
563 562
564 __radix_tree_insert(&tree, 0, old_order, (void *)0x12); 563 __radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
565 564
566 item = __radix_tree_lookup(&tree, 0, &node, NULL); 565 item = __radix_tree_lookup(&tree, 0, &node, NULL);
567 assert(item == (void *)0x12); 566 assert(item == xa_mk_value(5));
568 assert(node->exceptional > 0); 567 assert(node->exceptional > 0);
569 568
570 radix_tree_split(&tree, 0, new_order); 569 radix_tree_split(&tree, 0, new_order);
571 radix_tree_for_each_slot(slot, &tree, &iter, 0) { 570 radix_tree_for_each_slot(slot, &tree, &iter, 0) {
572 if (iter.index == (1 << new_order)) 571 if (iter.index == (1 << new_order))
573 radix_tree_iter_replace(&tree, &iter, slot, 572 radix_tree_iter_replace(&tree, &iter, slot,
574 (void *)0x16); 573 xa_mk_value(7));
575 else 574 else
576 radix_tree_iter_replace(&tree, &iter, slot, NULL); 575 radix_tree_iter_replace(&tree, &iter, slot, NULL);
577 } 576 }
578 577
579 item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL); 578 item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL);
580 assert(item == (void *)0x16); 579 assert(item == xa_mk_value(7));
581 assert(node->count == node->exceptional); 580 assert(node->count == node->exceptional);
582 do { 581 do {
583 node = node->parent; 582 node = node->parent;
@@ -610,13 +609,13 @@ static void multiorder_account(void)
610 609
611 item_insert_order(&tree, 0, 5); 610 item_insert_order(&tree, 0, 5);
612 611
613 __radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12); 612 __radix_tree_insert(&tree, 1 << 5, 5, xa_mk_value(5));
614 __radix_tree_lookup(&tree, 0, &node, NULL); 613 __radix_tree_lookup(&tree, 0, &node, NULL);
615 assert(node->count == node->exceptional * 2); 614 assert(node->count == node->exceptional * 2);
616 radix_tree_delete(&tree, 1 << 5); 615 radix_tree_delete(&tree, 1 << 5);
617 assert(node->exceptional == 0); 616 assert(node->exceptional == 0);
618 617
619 __radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12); 618 __radix_tree_insert(&tree, 1 << 5, 5, xa_mk_value(5));
620 __radix_tree_lookup(&tree, 1 << 5, &node, &slot); 619 __radix_tree_lookup(&tree, 1 << 5, &node, &slot);
621 assert(node->count == node->exceptional * 2); 620 assert(node->count == node->exceptional * 2);
622 __radix_tree_replace(&tree, node, slot, NULL, NULL); 621 __radix_tree_replace(&tree, node, slot, NULL, NULL);
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index def6015570b2..62de66c314b7 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -295,7 +295,7 @@ void item_kill_tree(struct radix_tree_root *root)
295 int nfound; 295 int nfound;
296 296
297 radix_tree_for_each_slot(slot, root, &iter, 0) { 297 radix_tree_for_each_slot(slot, root, &iter, 0) {
298 if (radix_tree_exceptional_entry(*slot)) 298 if (xa_is_value(*slot))
299 radix_tree_delete(root, iter.index); 299 radix_tree_delete(root, iter.index);
300 } 300 }
301 301