summaryrefslogtreecommitdiffstats
path: root/fs/dax.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-18 13:58:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-18 13:58:52 -0400
commit0fe49f70a08d7d25acee3b066a88c654fea26121 (patch)
treea6bcc3efdd88b7848d4262b08e8e62c83db2574a /fs/dax.c
parentf8c3500cd137867927bc080f4a6e02e0222dd1b8 (diff)
parent23c84eb7837514e16d79ed6d849b13745e0ce688 (diff)
Merge tag 'dax-for-5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull dax updates from Dan Williams: "The fruits of a bug hunt in the fsdax implementation with Willy and a small feature update for device-dax: - Fix a hang condition that started triggering after the Xarray conversion of fsdax in the v4.20 kernel. - Add a 'resource' (root-only physical base address) sysfs attribute to device-dax instances to correlate memory-blocks onlined via the kmem driver with a given device instance" * tag 'dax-for-5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: dax: Fix missed wakeup with PMD faults device-dax: Add a 'resource' attribute
Diffstat (limited to 'fs/dax.c')
-rw-r--r--fs/dax.c53
1 files changed, 33 insertions, 20 deletions
diff --git a/fs/dax.c b/fs/dax.c
index fe5e33810cd4..e99e5f373c88 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -124,6 +124,15 @@ static int dax_is_empty_entry(void *entry)
124} 124}
125 125
126/* 126/*
127 * true if the entry that was found is of a smaller order than the entry
128 * we were looking for
129 */
130static bool dax_is_conflict(void *entry)
131{
132 return entry == XA_RETRY_ENTRY;
133}
134
135/*
127 * DAX page cache entry locking 136 * DAX page cache entry locking
128 */ 137 */
129struct exceptional_entry_key { 138struct exceptional_entry_key {
@@ -195,11 +204,13 @@ static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
195 * Look up entry in page cache, wait for it to become unlocked if it 204 * Look up entry in page cache, wait for it to become unlocked if it
196 * is a DAX entry and return it. The caller must subsequently call 205 * is a DAX entry and return it. The caller must subsequently call
197 * put_unlocked_entry() if it did not lock the entry or dax_unlock_entry() 206 * put_unlocked_entry() if it did not lock the entry or dax_unlock_entry()
198 * if it did. 207 * if it did. The entry returned may have a larger order than @order.
208 * If @order is larger than the order of the entry found in i_pages, this
209 * function returns a dax_is_conflict entry.
199 * 210 *
200 * Must be called with the i_pages lock held. 211 * Must be called with the i_pages lock held.
201 */ 212 */
202static void *get_unlocked_entry(struct xa_state *xas) 213static void *get_unlocked_entry(struct xa_state *xas, unsigned int order)
203{ 214{
204 void *entry; 215 void *entry;
205 struct wait_exceptional_entry_queue ewait; 216 struct wait_exceptional_entry_queue ewait;
@@ -210,6 +221,8 @@ static void *get_unlocked_entry(struct xa_state *xas)
210 221
211 for (;;) { 222 for (;;) {
212 entry = xas_find_conflict(xas); 223 entry = xas_find_conflict(xas);
224 if (dax_entry_order(entry) < order)
225 return XA_RETRY_ENTRY;
213 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) || 226 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) ||
214 !dax_is_locked(entry)) 227 !dax_is_locked(entry))
215 return entry; 228 return entry;
@@ -254,7 +267,7 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry)
254static void put_unlocked_entry(struct xa_state *xas, void *entry) 267static void put_unlocked_entry(struct xa_state *xas, void *entry)
255{ 268{
256 /* If we were the only waiter woken, wake the next one */ 269 /* If we were the only waiter woken, wake the next one */
257 if (entry) 270 if (entry && dax_is_conflict(entry))
258 dax_wake_entry(xas, entry, false); 271 dax_wake_entry(xas, entry, false);
259} 272}
260 273
@@ -461,7 +474,7 @@ void dax_unlock_page(struct page *page, dax_entry_t cookie)
461 * overlap with xarray value entries. 474 * overlap with xarray value entries.
462 */ 475 */
463static void *grab_mapping_entry(struct xa_state *xas, 476static void *grab_mapping_entry(struct xa_state *xas,
464 struct address_space *mapping, unsigned long size_flag) 477 struct address_space *mapping, unsigned int order)
465{ 478{
466 unsigned long index = xas->xa_index; 479 unsigned long index = xas->xa_index;
467 bool pmd_downgrade = false; /* splitting PMD entry into PTE entries? */ 480 bool pmd_downgrade = false; /* splitting PMD entry into PTE entries? */
@@ -469,20 +482,17 @@ static void *grab_mapping_entry(struct xa_state *xas,
469 482
470retry: 483retry:
471 xas_lock_irq(xas); 484 xas_lock_irq(xas);
472 entry = get_unlocked_entry(xas); 485 entry = get_unlocked_entry(xas, order);
473 486
474 if (entry) { 487 if (entry) {
488 if (dax_is_conflict(entry))
489 goto fallback;
475 if (!xa_is_value(entry)) { 490 if (!xa_is_value(entry)) {
476 xas_set_err(xas, EIO); 491 xas_set_err(xas, EIO);
477 goto out_unlock; 492 goto out_unlock;
478 } 493 }
479 494
480 if (size_flag & DAX_PMD) { 495 if (order == 0) {
481 if (dax_is_pte_entry(entry)) {
482 put_unlocked_entry(xas, entry);
483 goto fallback;
484 }
485 } else { /* trying to grab a PTE entry */
486 if (dax_is_pmd_entry(entry) && 496 if (dax_is_pmd_entry(entry) &&
487 (dax_is_zero_entry(entry) || 497 (dax_is_zero_entry(entry) ||
488 dax_is_empty_entry(entry))) { 498 dax_is_empty_entry(entry))) {
@@ -523,7 +533,11 @@ retry:
523 if (entry) { 533 if (entry) {
524 dax_lock_entry(xas, entry); 534 dax_lock_entry(xas, entry);
525 } else { 535 } else {
526 entry = dax_make_entry(pfn_to_pfn_t(0), size_flag | DAX_EMPTY); 536 unsigned long flags = DAX_EMPTY;
537
538 if (order > 0)
539 flags |= DAX_PMD;
540 entry = dax_make_entry(pfn_to_pfn_t(0), flags);
527 dax_lock_entry(xas, entry); 541 dax_lock_entry(xas, entry);
528 if (xas_error(xas)) 542 if (xas_error(xas))
529 goto out_unlock; 543 goto out_unlock;
@@ -594,7 +608,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
594 if (WARN_ON_ONCE(!xa_is_value(entry))) 608 if (WARN_ON_ONCE(!xa_is_value(entry)))
595 continue; 609 continue;
596 if (unlikely(dax_is_locked(entry))) 610 if (unlikely(dax_is_locked(entry)))
597 entry = get_unlocked_entry(&xas); 611 entry = get_unlocked_entry(&xas, 0);
598 if (entry) 612 if (entry)
599 page = dax_busy_page(entry); 613 page = dax_busy_page(entry);
600 put_unlocked_entry(&xas, entry); 614 put_unlocked_entry(&xas, entry);
@@ -621,7 +635,7 @@ static int __dax_invalidate_entry(struct address_space *mapping,
621 void *entry; 635 void *entry;
622 636
623 xas_lock_irq(&xas); 637 xas_lock_irq(&xas);
624 entry = get_unlocked_entry(&xas); 638 entry = get_unlocked_entry(&xas, 0);
625 if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) 639 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
626 goto out; 640 goto out;
627 if (!trunc && 641 if (!trunc &&
@@ -848,7 +862,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
848 if (unlikely(dax_is_locked(entry))) { 862 if (unlikely(dax_is_locked(entry))) {
849 void *old_entry = entry; 863 void *old_entry = entry;
850 864
851 entry = get_unlocked_entry(xas); 865 entry = get_unlocked_entry(xas, 0);
852 866
853 /* Entry got punched out / reallocated? */ 867 /* Entry got punched out / reallocated? */
854 if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) 868 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
@@ -1509,7 +1523,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1509 * entry is already in the array, for instance), it will return 1523 * entry is already in the array, for instance), it will return
1510 * VM_FAULT_FALLBACK. 1524 * VM_FAULT_FALLBACK.
1511 */ 1525 */
1512 entry = grab_mapping_entry(&xas, mapping, DAX_PMD); 1526 entry = grab_mapping_entry(&xas, mapping, PMD_ORDER);
1513 if (xa_is_internal(entry)) { 1527 if (xa_is_internal(entry)) {
1514 result = xa_to_internal(entry); 1528 result = xa_to_internal(entry);
1515 goto fallback; 1529 goto fallback;
@@ -1658,11 +1672,10 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
1658 vm_fault_t ret; 1672 vm_fault_t ret;
1659 1673
1660 xas_lock_irq(&xas); 1674 xas_lock_irq(&xas);
1661 entry = get_unlocked_entry(&xas); 1675 entry = get_unlocked_entry(&xas, order);
1662 /* Did we race with someone splitting entry or so? */ 1676 /* Did we race with someone splitting entry or so? */
1663 if (!entry || 1677 if (!entry || dax_is_conflict(entry) ||
1664 (order == 0 && !dax_is_pte_entry(entry)) || 1678 (order == 0 && !dax_is_pte_entry(entry))) {
1665 (order == PMD_ORDER && !dax_is_pmd_entry(entry))) {
1666 put_unlocked_entry(&xas, entry); 1679 put_unlocked_entry(&xas, entry);
1667 xas_unlock_irq(&xas); 1680 xas_unlock_irq(&xas);
1668 trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, 1681 trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,