summaryrefslogtreecommitdiffstats
path: root/fs/dax.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dax.c')
-rw-r--r--fs/dax.c112
1 files changed, 55 insertions, 57 deletions
diff --git a/fs/dax.c b/fs/dax.c
index b68ce484e1be..ebcec36335eb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -59,56 +59,57 @@ static int __init init_dax_wait_table(void)
59fs_initcall(init_dax_wait_table); 59fs_initcall(init_dax_wait_table);
60 60
61/* 61/*
62 * We use lowest available bit in exceptional entry for locking, one bit for 62 * DAX pagecache entries use XArray value entries so they can't be mistaken
63 * the entry size (PMD) and two more to tell us if the entry is a zero page or 63 * for pages. We use one bit for locking, one bit for the entry size (PMD)
64 * an empty entry that is just used for locking. In total four special bits. 64 * and two more to tell us if the entry is a zero page or an empty entry that
65 * is just used for locking. In total four special bits.
65 * 66 *
66 * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE 67 * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE
67 * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem 68 * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
68 * block allocation. 69 * block allocation.
69 */ 70 */
70#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4) 71#define DAX_SHIFT (4)
71#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) 72#define DAX_LOCKED (1UL << 0)
72#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1)) 73#define DAX_PMD (1UL << 1)
73#define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) 74#define DAX_ZERO_PAGE (1UL << 2)
74#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)) 75#define DAX_EMPTY (1UL << 3)
75 76
76static unsigned long dax_radix_pfn(void *entry) 77static unsigned long dax_radix_pfn(void *entry)
77{ 78{
78 return (unsigned long)entry >> RADIX_DAX_SHIFT; 79 return xa_to_value(entry) >> DAX_SHIFT;
79} 80}
80 81
81static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags) 82static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags)
82{ 83{
83 return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags | 84 return xa_mk_value(flags | ((unsigned long)pfn << DAX_SHIFT) |
84 (pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK); 85 DAX_LOCKED);
85} 86}
86 87
87static unsigned int dax_radix_order(void *entry) 88static unsigned int dax_radix_order(void *entry)
88{ 89{
89 if ((unsigned long)entry & RADIX_DAX_PMD) 90 if (xa_to_value(entry) & DAX_PMD)
90 return PMD_SHIFT - PAGE_SHIFT; 91 return PMD_SHIFT - PAGE_SHIFT;
91 return 0; 92 return 0;
92} 93}
93 94
94static int dax_is_pmd_entry(void *entry) 95static int dax_is_pmd_entry(void *entry)
95{ 96{
96 return (unsigned long)entry & RADIX_DAX_PMD; 97 return xa_to_value(entry) & DAX_PMD;
97} 98}
98 99
99static int dax_is_pte_entry(void *entry) 100static int dax_is_pte_entry(void *entry)
100{ 101{
101 return !((unsigned long)entry & RADIX_DAX_PMD); 102 return !(xa_to_value(entry) & DAX_PMD);
102} 103}
103 104
104static int dax_is_zero_entry(void *entry) 105static int dax_is_zero_entry(void *entry)
105{ 106{
106 return (unsigned long)entry & RADIX_DAX_ZERO_PAGE; 107 return xa_to_value(entry) & DAX_ZERO_PAGE;
107} 108}
108 109
109static int dax_is_empty_entry(void *entry) 110static int dax_is_empty_entry(void *entry)
110{ 111{
111 return (unsigned long)entry & RADIX_DAX_EMPTY; 112 return xa_to_value(entry) & DAX_EMPTY;
112} 113}
113 114
114/* 115/*
@@ -186,9 +187,9 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
186 */ 187 */
187static inline int slot_locked(struct address_space *mapping, void **slot) 188static inline int slot_locked(struct address_space *mapping, void **slot)
188{ 189{
189 unsigned long entry = (unsigned long) 190 unsigned long entry = xa_to_value(
190 radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); 191 radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
191 return entry & RADIX_DAX_ENTRY_LOCK; 192 return entry & DAX_LOCKED;
192} 193}
193 194
194/* 195/*
@@ -196,12 +197,11 @@ static inline int slot_locked(struct address_space *mapping, void **slot)
196 */ 197 */
197static inline void *lock_slot(struct address_space *mapping, void **slot) 198static inline void *lock_slot(struct address_space *mapping, void **slot)
198{ 199{
199 unsigned long entry = (unsigned long) 200 unsigned long v = xa_to_value(
200 radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); 201 radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
201 202 void *entry = xa_mk_value(v | DAX_LOCKED);
202 entry |= RADIX_DAX_ENTRY_LOCK; 203 radix_tree_replace_slot(&mapping->i_pages, slot, entry);
203 radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry); 204 return entry;
204 return (void *)entry;
205} 205}
206 206
207/* 207/*
@@ -209,17 +209,16 @@ static inline void *lock_slot(struct address_space *mapping, void **slot)
209 */ 209 */
210static inline void *unlock_slot(struct address_space *mapping, void **slot) 210static inline void *unlock_slot(struct address_space *mapping, void **slot)
211{ 211{
212 unsigned long entry = (unsigned long) 212 unsigned long v = xa_to_value(
213 radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); 213 radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
214 214 void *entry = xa_mk_value(v & ~DAX_LOCKED);
215 entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK; 215 radix_tree_replace_slot(&mapping->i_pages, slot, entry);
216 radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry); 216 return entry;
217 return (void *)entry;
218} 217}
219 218
220/* 219/*
221 * Lookup entry in radix tree, wait for it to become unlocked if it is 220 * Lookup entry in radix tree, wait for it to become unlocked if it is
222 * exceptional entry and return it. The caller must call 221 * a DAX entry and return it. The caller must call
223 * put_unlocked_mapping_entry() when he decided not to lock the entry or 222 * put_unlocked_mapping_entry() when he decided not to lock the entry or
224 * put_locked_mapping_entry() when he locked the entry and now wants to 223 * put_locked_mapping_entry() when he locked the entry and now wants to
225 * unlock it. 224 * unlock it.
@@ -242,7 +241,7 @@ static void *__get_unlocked_mapping_entry(struct address_space *mapping,
242 entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, 241 entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
243 &slot); 242 &slot);
244 if (!entry || 243 if (!entry ||
245 WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) || 244 WARN_ON_ONCE(!xa_is_value(entry)) ||
246 !slot_locked(mapping, slot)) { 245 !slot_locked(mapping, slot)) {
247 if (slotp) 246 if (slotp)
248 *slotp = slot; 247 *slotp = slot;
@@ -283,7 +282,7 @@ static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
283 282
284 xa_lock_irq(&mapping->i_pages); 283 xa_lock_irq(&mapping->i_pages);
285 entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); 284 entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot);
286 if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) || 285 if (WARN_ON_ONCE(!entry || !xa_is_value(entry) ||
287 !slot_locked(mapping, slot))) { 286 !slot_locked(mapping, slot))) {
288 xa_unlock_irq(&mapping->i_pages); 287 xa_unlock_irq(&mapping->i_pages);
289 return; 288 return;
@@ -472,12 +471,11 @@ void dax_unlock_mapping_entry(struct page *page)
472} 471}
473 472
474/* 473/*
475 * Find radix tree entry at given index. If it points to an exceptional entry, 474 * Find radix tree entry at given index. If it is a DAX entry, return it
476 * return it with the radix tree entry locked. If the radix tree doesn't 475 * with the radix tree entry locked. If the radix tree doesn't contain the
477 * contain given index, create an empty exceptional entry for the index and 476 * given index, create an empty entry for the index and return with it locked.
478 * return with it locked.
479 * 477 *
480 * When requesting an entry with size RADIX_DAX_PMD, grab_mapping_entry() will 478 * When requesting an entry with size DAX_PMD, grab_mapping_entry() will
481 * either return that locked entry or will return an error. This error will 479 * either return that locked entry or will return an error. This error will
482 * happen if there are any 4k entries within the 2MiB range that we are 480 * happen if there are any 4k entries within the 2MiB range that we are
483 * requesting. 481 * requesting.
@@ -507,13 +505,13 @@ restart:
507 xa_lock_irq(&mapping->i_pages); 505 xa_lock_irq(&mapping->i_pages);
508 entry = get_unlocked_mapping_entry(mapping, index, &slot); 506 entry = get_unlocked_mapping_entry(mapping, index, &slot);
509 507
510 if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) { 508 if (WARN_ON_ONCE(entry && !xa_is_value(entry))) {
511 entry = ERR_PTR(-EIO); 509 entry = ERR_PTR(-EIO);
512 goto out_unlock; 510 goto out_unlock;
513 } 511 }
514 512
515 if (entry) { 513 if (entry) {
516 if (size_flag & RADIX_DAX_PMD) { 514 if (size_flag & DAX_PMD) {
517 if (dax_is_pte_entry(entry)) { 515 if (dax_is_pte_entry(entry)) {
518 put_unlocked_mapping_entry(mapping, index, 516 put_unlocked_mapping_entry(mapping, index,
519 entry); 517 entry);
@@ -584,7 +582,7 @@ restart:
584 true); 582 true);
585 } 583 }
586 584
587 entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY); 585 entry = dax_radix_locked_entry(0, size_flag | DAX_EMPTY);
588 586
589 err = __radix_tree_insert(&mapping->i_pages, index, 587 err = __radix_tree_insert(&mapping->i_pages, index,
590 dax_radix_order(entry), entry); 588 dax_radix_order(entry), entry);
@@ -673,8 +671,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
673 if (index >= end) 671 if (index >= end)
674 break; 672 break;
675 673
676 if (WARN_ON_ONCE( 674 if (WARN_ON_ONCE(!xa_is_value(pvec_ent)))
677 !radix_tree_exceptional_entry(pvec_ent)))
678 continue; 675 continue;
679 676
680 xa_lock_irq(&mapping->i_pages); 677 xa_lock_irq(&mapping->i_pages);
@@ -713,7 +710,7 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping,
713 710
714 xa_lock_irq(pages); 711 xa_lock_irq(pages);
715 entry = get_unlocked_mapping_entry(mapping, index, NULL); 712 entry = get_unlocked_mapping_entry(mapping, index, NULL);
716 if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry))) 713 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
717 goto out; 714 goto out;
718 if (!trunc && 715 if (!trunc &&
719 (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) || 716 (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) ||
@@ -729,8 +726,8 @@ out:
729 return ret; 726 return ret;
730} 727}
731/* 728/*
732 * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree 729 * Delete DAX entry at @index from @mapping. Wait for it
733 * entry to get unlocked before deleting it. 730 * to be unlocked before deleting it.
734 */ 731 */
735int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) 732int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
736{ 733{
@@ -740,7 +737,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
740 * This gets called from truncate / punch_hole path. As such, the caller 737 * This gets called from truncate / punch_hole path. As such, the caller
741 * must hold locks protecting against concurrent modifications of the 738 * must hold locks protecting against concurrent modifications of the
742 * radix tree (usually fs-private i_mmap_sem for writing). Since the 739 * radix tree (usually fs-private i_mmap_sem for writing). Since the
743 * caller has seen exceptional entry for this index, we better find it 740 * caller has seen a DAX entry for this index, we better find it
744 * at that index as well... 741 * at that index as well...
745 */ 742 */
746 WARN_ON_ONCE(!ret); 743 WARN_ON_ONCE(!ret);
@@ -748,7 +745,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
748} 745}
749 746
750/* 747/*
751 * Invalidate exceptional DAX entry if it is clean. 748 * Invalidate DAX entry if it is clean.
752 */ 749 */
753int dax_invalidate_mapping_entry_sync(struct address_space *mapping, 750int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
754 pgoff_t index) 751 pgoff_t index)
@@ -802,7 +799,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
802 if (dirty) 799 if (dirty)
803 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 800 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
804 801
805 if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) { 802 if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
806 /* we are replacing a zero page with block mapping */ 803 /* we are replacing a zero page with block mapping */
807 if (dax_is_pmd_entry(entry)) 804 if (dax_is_pmd_entry(entry))
808 unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR, 805 unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
@@ -940,13 +937,13 @@ static int dax_writeback_one(struct dax_device *dax_dev,
940 * A page got tagged dirty in DAX mapping? Something is seriously 937 * A page got tagged dirty in DAX mapping? Something is seriously
941 * wrong. 938 * wrong.
942 */ 939 */
943 if (WARN_ON(!radix_tree_exceptional_entry(entry))) 940 if (WARN_ON(!xa_is_value(entry)))
944 return -EIO; 941 return -EIO;
945 942
946 xa_lock_irq(pages); 943 xa_lock_irq(pages);
947 entry2 = get_unlocked_mapping_entry(mapping, index, &slot); 944 entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
948 /* Entry got punched out / reallocated? */ 945 /* Entry got punched out / reallocated? */
949 if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2))) 946 if (!entry2 || WARN_ON_ONCE(!xa_is_value(entry2)))
950 goto put_unlocked; 947 goto put_unlocked;
951 /* 948 /*
952 * Entry got reallocated elsewhere? No need to writeback. We have to 949 * Entry got reallocated elsewhere? No need to writeback. We have to
@@ -1123,8 +1120,9 @@ static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry,
1123 pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); 1120 pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
1124 vm_fault_t ret; 1121 vm_fault_t ret;
1125 1122
1126 dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE, 1123 dax_insert_mapping_entry(mapping, vmf, entry, pfn,
1127 false); 1124 DAX_ZERO_PAGE, false);
1125
1128 ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); 1126 ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
1129 trace_dax_load_hole(inode, vmf, ret); 1127 trace_dax_load_hole(inode, vmf, ret);
1130 return ret; 1128 return ret;
@@ -1514,7 +1512,7 @@ static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
1514 1512
1515 pfn = page_to_pfn_t(zero_page); 1513 pfn = page_to_pfn_t(zero_page);
1516 ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn, 1514 ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
1517 RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false); 1515 DAX_PMD | DAX_ZERO_PAGE, false);
1518 1516
1519 ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); 1517 ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
1520 if (!pmd_none(*(vmf->pmd))) { 1518 if (!pmd_none(*(vmf->pmd))) {
@@ -1597,7 +1595,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1597 * is already in the tree, for instance), it will return -EEXIST and 1595 * is already in the tree, for instance), it will return -EEXIST and
1598 * we just fall back to 4k entries. 1596 * we just fall back to 4k entries.
1599 */ 1597 */
1600 entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); 1598 entry = grab_mapping_entry(mapping, pgoff, DAX_PMD);
1601 if (IS_ERR(entry)) 1599 if (IS_ERR(entry))
1602 goto fallback; 1600 goto fallback;
1603 1601
@@ -1635,7 +1633,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1635 goto finish_iomap; 1633 goto finish_iomap;
1636 1634
1637 entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, 1635 entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
1638 RADIX_DAX_PMD, write && !sync); 1636 DAX_PMD, write && !sync);
1639 1637
1640 /* 1638 /*
1641 * If we are doing synchronous page fault and inode needs fsync, 1639 * If we are doing synchronous page fault and inode needs fsync,