diff options
Diffstat (limited to 'fs/dax.c')
-rw-r--r-- | fs/dax.c | 112 |
1 files changed, 55 insertions, 57 deletions
@@ -59,56 +59,57 @@ static int __init init_dax_wait_table(void) | |||
59 | fs_initcall(init_dax_wait_table); | 59 | fs_initcall(init_dax_wait_table); |
60 | 60 | ||
61 | /* | 61 | /* |
62 | * We use lowest available bit in exceptional entry for locking, one bit for | 62 | * DAX pagecache entries use XArray value entries so they can't be mistaken |
63 | * the entry size (PMD) and two more to tell us if the entry is a zero page or | 63 | * for pages. We use one bit for locking, one bit for the entry size (PMD) |
64 | * an empty entry that is just used for locking. In total four special bits. | 64 | * and two more to tell us if the entry is a zero page or an empty entry that |
65 | * is just used for locking. In total four special bits. | ||
65 | * | 66 | * |
66 | * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE | 67 | * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE |
67 | * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem | 68 | * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem |
68 | * block allocation. | 69 | * block allocation. |
69 | */ | 70 | */ |
70 | #define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4) | 71 | #define DAX_SHIFT (4) |
71 | #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) | 72 | #define DAX_LOCKED (1UL << 0) |
72 | #define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1)) | 73 | #define DAX_PMD (1UL << 1) |
73 | #define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) | 74 | #define DAX_ZERO_PAGE (1UL << 2) |
74 | #define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)) | 75 | #define DAX_EMPTY (1UL << 3) |
75 | 76 | ||
76 | static unsigned long dax_radix_pfn(void *entry) | 77 | static unsigned long dax_radix_pfn(void *entry) |
77 | { | 78 | { |
78 | return (unsigned long)entry >> RADIX_DAX_SHIFT; | 79 | return xa_to_value(entry) >> DAX_SHIFT; |
79 | } | 80 | } |
80 | 81 | ||
81 | static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags) | 82 | static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags) |
82 | { | 83 | { |
83 | return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags | | 84 | return xa_mk_value(flags | ((unsigned long)pfn << DAX_SHIFT) | |
84 | (pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK); | 85 | DAX_LOCKED); |
85 | } | 86 | } |
86 | 87 | ||
87 | static unsigned int dax_radix_order(void *entry) | 88 | static unsigned int dax_radix_order(void *entry) |
88 | { | 89 | { |
89 | if ((unsigned long)entry & RADIX_DAX_PMD) | 90 | if (xa_to_value(entry) & DAX_PMD) |
90 | return PMD_SHIFT - PAGE_SHIFT; | 91 | return PMD_SHIFT - PAGE_SHIFT; |
91 | return 0; | 92 | return 0; |
92 | } | 93 | } |
93 | 94 | ||
94 | static int dax_is_pmd_entry(void *entry) | 95 | static int dax_is_pmd_entry(void *entry) |
95 | { | 96 | { |
96 | return (unsigned long)entry & RADIX_DAX_PMD; | 97 | return xa_to_value(entry) & DAX_PMD; |
97 | } | 98 | } |
98 | 99 | ||
99 | static int dax_is_pte_entry(void *entry) | 100 | static int dax_is_pte_entry(void *entry) |
100 | { | 101 | { |
101 | return !((unsigned long)entry & RADIX_DAX_PMD); | 102 | return !(xa_to_value(entry) & DAX_PMD); |
102 | } | 103 | } |
103 | 104 | ||
104 | static int dax_is_zero_entry(void *entry) | 105 | static int dax_is_zero_entry(void *entry) |
105 | { | 106 | { |
106 | return (unsigned long)entry & RADIX_DAX_ZERO_PAGE; | 107 | return xa_to_value(entry) & DAX_ZERO_PAGE; |
107 | } | 108 | } |
108 | 109 | ||
109 | static int dax_is_empty_entry(void *entry) | 110 | static int dax_is_empty_entry(void *entry) |
110 | { | 111 | { |
111 | return (unsigned long)entry & RADIX_DAX_EMPTY; | 112 | return xa_to_value(entry) & DAX_EMPTY; |
112 | } | 113 | } |
113 | 114 | ||
114 | /* | 115 | /* |
@@ -186,9 +187,9 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping, | |||
186 | */ | 187 | */ |
187 | static inline int slot_locked(struct address_space *mapping, void **slot) | 188 | static inline int slot_locked(struct address_space *mapping, void **slot) |
188 | { | 189 | { |
189 | unsigned long entry = (unsigned long) | 190 | unsigned long entry = xa_to_value( |
190 | radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); | 191 | radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock)); |
191 | return entry & RADIX_DAX_ENTRY_LOCK; | 192 | return entry & DAX_LOCKED; |
192 | } | 193 | } |
193 | 194 | ||
194 | /* | 195 | /* |
@@ -196,12 +197,11 @@ static inline int slot_locked(struct address_space *mapping, void **slot) | |||
196 | */ | 197 | */ |
197 | static inline void *lock_slot(struct address_space *mapping, void **slot) | 198 | static inline void *lock_slot(struct address_space *mapping, void **slot) |
198 | { | 199 | { |
199 | unsigned long entry = (unsigned long) | 200 | unsigned long v = xa_to_value( |
200 | radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); | 201 | radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock)); |
201 | 202 | void *entry = xa_mk_value(v | DAX_LOCKED); | |
202 | entry |= RADIX_DAX_ENTRY_LOCK; | 203 | radix_tree_replace_slot(&mapping->i_pages, slot, entry); |
203 | radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry); | 204 | return entry; |
204 | return (void *)entry; | ||
205 | } | 205 | } |
206 | 206 | ||
207 | /* | 207 | /* |
@@ -209,17 +209,16 @@ static inline void *lock_slot(struct address_space *mapping, void **slot) | |||
209 | */ | 209 | */ |
210 | static inline void *unlock_slot(struct address_space *mapping, void **slot) | 210 | static inline void *unlock_slot(struct address_space *mapping, void **slot) |
211 | { | 211 | { |
212 | unsigned long entry = (unsigned long) | 212 | unsigned long v = xa_to_value( |
213 | radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); | 213 | radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock)); |
214 | 214 | void *entry = xa_mk_value(v & ~DAX_LOCKED); | |
215 | entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK; | 215 | radix_tree_replace_slot(&mapping->i_pages, slot, entry); |
216 | radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry); | 216 | return entry; |
217 | return (void *)entry; | ||
218 | } | 217 | } |
219 | 218 | ||
220 | /* | 219 | /* |
221 | * Lookup entry in radix tree, wait for it to become unlocked if it is | 220 | * Lookup entry in radix tree, wait for it to become unlocked if it is |
222 | * exceptional entry and return it. The caller must call | 221 | * a DAX entry and return it. The caller must call |
223 | * put_unlocked_mapping_entry() when he decided not to lock the entry or | 222 | * put_unlocked_mapping_entry() when he decided not to lock the entry or |
224 | * put_locked_mapping_entry() when he locked the entry and now wants to | 223 | * put_locked_mapping_entry() when he locked the entry and now wants to |
225 | * unlock it. | 224 | * unlock it. |
@@ -242,7 +241,7 @@ static void *__get_unlocked_mapping_entry(struct address_space *mapping, | |||
242 | entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, | 241 | entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, |
243 | &slot); | 242 | &slot); |
244 | if (!entry || | 243 | if (!entry || |
245 | WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) || | 244 | WARN_ON_ONCE(!xa_is_value(entry)) || |
246 | !slot_locked(mapping, slot)) { | 245 | !slot_locked(mapping, slot)) { |
247 | if (slotp) | 246 | if (slotp) |
248 | *slotp = slot; | 247 | *slotp = slot; |
@@ -283,7 +282,7 @@ static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index) | |||
283 | 282 | ||
284 | xa_lock_irq(&mapping->i_pages); | 283 | xa_lock_irq(&mapping->i_pages); |
285 | entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); | 284 | entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); |
286 | if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) || | 285 | if (WARN_ON_ONCE(!entry || !xa_is_value(entry) || |
287 | !slot_locked(mapping, slot))) { | 286 | !slot_locked(mapping, slot))) { |
288 | xa_unlock_irq(&mapping->i_pages); | 287 | xa_unlock_irq(&mapping->i_pages); |
289 | return; | 288 | return; |
@@ -472,12 +471,11 @@ void dax_unlock_mapping_entry(struct page *page) | |||
472 | } | 471 | } |
473 | 472 | ||
474 | /* | 473 | /* |
475 | * Find radix tree entry at given index. If it points to an exceptional entry, | 474 | * Find radix tree entry at given index. If it is a DAX entry, return it |
476 | * return it with the radix tree entry locked. If the radix tree doesn't | 475 | * with the radix tree entry locked. If the radix tree doesn't contain the |
477 | * contain given index, create an empty exceptional entry for the index and | 476 | * given index, create an empty entry for the index and return with it locked. |
478 | * return with it locked. | ||
479 | * | 477 | * |
480 | * When requesting an entry with size RADIX_DAX_PMD, grab_mapping_entry() will | 478 | * When requesting an entry with size DAX_PMD, grab_mapping_entry() will |
481 | * either return that locked entry or will return an error. This error will | 479 | * either return that locked entry or will return an error. This error will |
482 | * happen if there are any 4k entries within the 2MiB range that we are | 480 | * happen if there are any 4k entries within the 2MiB range that we are |
483 | * requesting. | 481 | * requesting. |
@@ -507,13 +505,13 @@ restart: | |||
507 | xa_lock_irq(&mapping->i_pages); | 505 | xa_lock_irq(&mapping->i_pages); |
508 | entry = get_unlocked_mapping_entry(mapping, index, &slot); | 506 | entry = get_unlocked_mapping_entry(mapping, index, &slot); |
509 | 507 | ||
510 | if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) { | 508 | if (WARN_ON_ONCE(entry && !xa_is_value(entry))) { |
511 | entry = ERR_PTR(-EIO); | 509 | entry = ERR_PTR(-EIO); |
512 | goto out_unlock; | 510 | goto out_unlock; |
513 | } | 511 | } |
514 | 512 | ||
515 | if (entry) { | 513 | if (entry) { |
516 | if (size_flag & RADIX_DAX_PMD) { | 514 | if (size_flag & DAX_PMD) { |
517 | if (dax_is_pte_entry(entry)) { | 515 | if (dax_is_pte_entry(entry)) { |
518 | put_unlocked_mapping_entry(mapping, index, | 516 | put_unlocked_mapping_entry(mapping, index, |
519 | entry); | 517 | entry); |
@@ -584,7 +582,7 @@ restart: | |||
584 | true); | 582 | true); |
585 | } | 583 | } |
586 | 584 | ||
587 | entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY); | 585 | entry = dax_radix_locked_entry(0, size_flag | DAX_EMPTY); |
588 | 586 | ||
589 | err = __radix_tree_insert(&mapping->i_pages, index, | 587 | err = __radix_tree_insert(&mapping->i_pages, index, |
590 | dax_radix_order(entry), entry); | 588 | dax_radix_order(entry), entry); |
@@ -673,8 +671,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping) | |||
673 | if (index >= end) | 671 | if (index >= end) |
674 | break; | 672 | break; |
675 | 673 | ||
676 | if (WARN_ON_ONCE( | 674 | if (WARN_ON_ONCE(!xa_is_value(pvec_ent))) |
677 | !radix_tree_exceptional_entry(pvec_ent))) | ||
678 | continue; | 675 | continue; |
679 | 676 | ||
680 | xa_lock_irq(&mapping->i_pages); | 677 | xa_lock_irq(&mapping->i_pages); |
@@ -713,7 +710,7 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping, | |||
713 | 710 | ||
714 | xa_lock_irq(pages); | 711 | xa_lock_irq(pages); |
715 | entry = get_unlocked_mapping_entry(mapping, index, NULL); | 712 | entry = get_unlocked_mapping_entry(mapping, index, NULL); |
716 | if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry))) | 713 | if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) |
717 | goto out; | 714 | goto out; |
718 | if (!trunc && | 715 | if (!trunc && |
719 | (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) || | 716 | (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) || |
@@ -729,8 +726,8 @@ out: | |||
729 | return ret; | 726 | return ret; |
730 | } | 727 | } |
731 | /* | 728 | /* |
732 | * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree | 729 | * Delete DAX entry at @index from @mapping. Wait for it |
733 | * entry to get unlocked before deleting it. | 730 | * to be unlocked before deleting it. |
734 | */ | 731 | */ |
735 | int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) | 732 | int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) |
736 | { | 733 | { |
@@ -740,7 +737,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) | |||
740 | * This gets called from truncate / punch_hole path. As such, the caller | 737 | * This gets called from truncate / punch_hole path. As such, the caller |
741 | * must hold locks protecting against concurrent modifications of the | 738 | * must hold locks protecting against concurrent modifications of the |
742 | * radix tree (usually fs-private i_mmap_sem for writing). Since the | 739 | * radix tree (usually fs-private i_mmap_sem for writing). Since the |
743 | * caller has seen exceptional entry for this index, we better find it | 740 | * caller has seen a DAX entry for this index, we better find it |
744 | * at that index as well... | 741 | * at that index as well... |
745 | */ | 742 | */ |
746 | WARN_ON_ONCE(!ret); | 743 | WARN_ON_ONCE(!ret); |
@@ -748,7 +745,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) | |||
748 | } | 745 | } |
749 | 746 | ||
750 | /* | 747 | /* |
751 | * Invalidate exceptional DAX entry if it is clean. | 748 | * Invalidate DAX entry if it is clean. |
752 | */ | 749 | */ |
753 | int dax_invalidate_mapping_entry_sync(struct address_space *mapping, | 750 | int dax_invalidate_mapping_entry_sync(struct address_space *mapping, |
754 | pgoff_t index) | 751 | pgoff_t index) |
@@ -802,7 +799,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, | |||
802 | if (dirty) | 799 | if (dirty) |
803 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 800 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
804 | 801 | ||
805 | if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) { | 802 | if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) { |
806 | /* we are replacing a zero page with block mapping */ | 803 | /* we are replacing a zero page with block mapping */ |
807 | if (dax_is_pmd_entry(entry)) | 804 | if (dax_is_pmd_entry(entry)) |
808 | unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR, | 805 | unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR, |
@@ -940,13 +937,13 @@ static int dax_writeback_one(struct dax_device *dax_dev, | |||
940 | * A page got tagged dirty in DAX mapping? Something is seriously | 937 | * A page got tagged dirty in DAX mapping? Something is seriously |
941 | * wrong. | 938 | * wrong. |
942 | */ | 939 | */ |
943 | if (WARN_ON(!radix_tree_exceptional_entry(entry))) | 940 | if (WARN_ON(!xa_is_value(entry))) |
944 | return -EIO; | 941 | return -EIO; |
945 | 942 | ||
946 | xa_lock_irq(pages); | 943 | xa_lock_irq(pages); |
947 | entry2 = get_unlocked_mapping_entry(mapping, index, &slot); | 944 | entry2 = get_unlocked_mapping_entry(mapping, index, &slot); |
948 | /* Entry got punched out / reallocated? */ | 945 | /* Entry got punched out / reallocated? */ |
949 | if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2))) | 946 | if (!entry2 || WARN_ON_ONCE(!xa_is_value(entry2))) |
950 | goto put_unlocked; | 947 | goto put_unlocked; |
951 | /* | 948 | /* |
952 | * Entry got reallocated elsewhere? No need to writeback. We have to | 949 | * Entry got reallocated elsewhere? No need to writeback. We have to |
@@ -1123,8 +1120,9 @@ static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry, | |||
1123 | pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); | 1120 | pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); |
1124 | vm_fault_t ret; | 1121 | vm_fault_t ret; |
1125 | 1122 | ||
1126 | dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE, | 1123 | dax_insert_mapping_entry(mapping, vmf, entry, pfn, |
1127 | false); | 1124 | DAX_ZERO_PAGE, false); |
1125 | |||
1128 | ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); | 1126 | ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); |
1129 | trace_dax_load_hole(inode, vmf, ret); | 1127 | trace_dax_load_hole(inode, vmf, ret); |
1130 | return ret; | 1128 | return ret; |
@@ -1514,7 +1512,7 @@ static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, | |||
1514 | 1512 | ||
1515 | pfn = page_to_pfn_t(zero_page); | 1513 | pfn = page_to_pfn_t(zero_page); |
1516 | ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn, | 1514 | ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn, |
1517 | RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false); | 1515 | DAX_PMD | DAX_ZERO_PAGE, false); |
1518 | 1516 | ||
1519 | ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); | 1517 | ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); |
1520 | if (!pmd_none(*(vmf->pmd))) { | 1518 | if (!pmd_none(*(vmf->pmd))) { |
@@ -1597,7 +1595,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, | |||
1597 | * is already in the tree, for instance), it will return -EEXIST and | 1595 | * is already in the tree, for instance), it will return -EEXIST and |
1598 | * we just fall back to 4k entries. | 1596 | * we just fall back to 4k entries. |
1599 | */ | 1597 | */ |
1600 | entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); | 1598 | entry = grab_mapping_entry(mapping, pgoff, DAX_PMD); |
1601 | if (IS_ERR(entry)) | 1599 | if (IS_ERR(entry)) |
1602 | goto fallback; | 1600 | goto fallback; |
1603 | 1601 | ||
@@ -1635,7 +1633,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, | |||
1635 | goto finish_iomap; | 1633 | goto finish_iomap; |
1636 | 1634 | ||
1637 | entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, | 1635 | entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, |
1638 | RADIX_DAX_PMD, write && !sync); | 1636 | DAX_PMD, write && !sync); |
1639 | 1637 | ||
1640 | /* | 1638 | /* |
1641 | * If we are doing synchronous page fault and inode needs fsync, | 1639 | * If we are doing synchronous page fault and inode needs fsync, |