diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-18 13:58:52 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-18 13:58:52 -0400 |
commit | 0fe49f70a08d7d25acee3b066a88c654fea26121 (patch) | |
tree | a6bcc3efdd88b7848d4262b08e8e62c83db2574a /fs/dax.c | |
parent | f8c3500cd137867927bc080f4a6e02e0222dd1b8 (diff) | |
parent | 23c84eb7837514e16d79ed6d849b13745e0ce688 (diff) |
Merge tag 'dax-for-5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull dax updates from Dan Williams:
"The fruits of a bug hunt in the fsdax implementation with Willy and a
small feature update for device-dax:
- Fix a hang condition that started triggering after the Xarray
conversion of fsdax in the v4.20 kernel.
- Add a 'resource' (root-only physical base address) sysfs attribute
to device-dax instances to correlate memory-blocks onlined via the
kmem driver with a given device instance"
* tag 'dax-for-5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
dax: Fix missed wakeup with PMD faults
device-dax: Add a 'resource' attribute
Diffstat (limited to 'fs/dax.c')
-rw-r--r-- | fs/dax.c | 53 |
1 files changed, 33 insertions, 20 deletions
@@ -124,6 +124,15 @@ static int dax_is_empty_entry(void *entry) | |||
124 | } | 124 | } |
125 | 125 | ||
126 | /* | 126 | /* |
127 | * true if the entry that was found is of a smaller order than the entry | ||
128 | * we were looking for | ||
129 | */ | ||
130 | static bool dax_is_conflict(void *entry) | ||
131 | { | ||
132 | return entry == XA_RETRY_ENTRY; | ||
133 | } | ||
134 | |||
135 | /* | ||
127 | * DAX page cache entry locking | 136 | * DAX page cache entry locking |
128 | */ | 137 | */ |
129 | struct exceptional_entry_key { | 138 | struct exceptional_entry_key { |
@@ -195,11 +204,13 @@ static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all) | |||
195 | * Look up entry in page cache, wait for it to become unlocked if it | 204 | * Look up entry in page cache, wait for it to become unlocked if it |
196 | * is a DAX entry and return it. The caller must subsequently call | 205 | * is a DAX entry and return it. The caller must subsequently call |
197 | * put_unlocked_entry() if it did not lock the entry or dax_unlock_entry() | 206 | * put_unlocked_entry() if it did not lock the entry or dax_unlock_entry() |
198 | * if it did. | 207 | * if it did. The entry returned may have a larger order than @order. |
208 | * If @order is larger than the order of the entry found in i_pages, this | ||
209 | * function returns a dax_is_conflict entry. | ||
199 | * | 210 | * |
200 | * Must be called with the i_pages lock held. | 211 | * Must be called with the i_pages lock held. |
201 | */ | 212 | */ |
202 | static void *get_unlocked_entry(struct xa_state *xas) | 213 | static void *get_unlocked_entry(struct xa_state *xas, unsigned int order) |
203 | { | 214 | { |
204 | void *entry; | 215 | void *entry; |
205 | struct wait_exceptional_entry_queue ewait; | 216 | struct wait_exceptional_entry_queue ewait; |
@@ -210,6 +221,8 @@ static void *get_unlocked_entry(struct xa_state *xas) | |||
210 | 221 | ||
211 | for (;;) { | 222 | for (;;) { |
212 | entry = xas_find_conflict(xas); | 223 | entry = xas_find_conflict(xas); |
224 | if (dax_entry_order(entry) < order) | ||
225 | return XA_RETRY_ENTRY; | ||
213 | if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) || | 226 | if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) || |
214 | !dax_is_locked(entry)) | 227 | !dax_is_locked(entry)) |
215 | return entry; | 228 | return entry; |
@@ -254,7 +267,7 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry) | |||
254 | static void put_unlocked_entry(struct xa_state *xas, void *entry) | 267 | static void put_unlocked_entry(struct xa_state *xas, void *entry) |
255 | { | 268 | { |
256 | /* If we were the only waiter woken, wake the next one */ | 269 | /* If we were the only waiter woken, wake the next one */ |
257 | if (entry) | 270 | if (entry && dax_is_conflict(entry)) |
258 | dax_wake_entry(xas, entry, false); | 271 | dax_wake_entry(xas, entry, false); |
259 | } | 272 | } |
260 | 273 | ||
@@ -461,7 +474,7 @@ void dax_unlock_page(struct page *page, dax_entry_t cookie) | |||
461 | * overlap with xarray value entries. | 474 | * overlap with xarray value entries. |
462 | */ | 475 | */ |
463 | static void *grab_mapping_entry(struct xa_state *xas, | 476 | static void *grab_mapping_entry(struct xa_state *xas, |
464 | struct address_space *mapping, unsigned long size_flag) | 477 | struct address_space *mapping, unsigned int order) |
465 | { | 478 | { |
466 | unsigned long index = xas->xa_index; | 479 | unsigned long index = xas->xa_index; |
467 | bool pmd_downgrade = false; /* splitting PMD entry into PTE entries? */ | 480 | bool pmd_downgrade = false; /* splitting PMD entry into PTE entries? */ |
@@ -469,20 +482,17 @@ static void *grab_mapping_entry(struct xa_state *xas, | |||
469 | 482 | ||
470 | retry: | 483 | retry: |
471 | xas_lock_irq(xas); | 484 | xas_lock_irq(xas); |
472 | entry = get_unlocked_entry(xas); | 485 | entry = get_unlocked_entry(xas, order); |
473 | 486 | ||
474 | if (entry) { | 487 | if (entry) { |
488 | if (dax_is_conflict(entry)) | ||
489 | goto fallback; | ||
475 | if (!xa_is_value(entry)) { | 490 | if (!xa_is_value(entry)) { |
476 | xas_set_err(xas, EIO); | 491 | xas_set_err(xas, EIO); |
477 | goto out_unlock; | 492 | goto out_unlock; |
478 | } | 493 | } |
479 | 494 | ||
480 | if (size_flag & DAX_PMD) { | 495 | if (order == 0) { |
481 | if (dax_is_pte_entry(entry)) { | ||
482 | put_unlocked_entry(xas, entry); | ||
483 | goto fallback; | ||
484 | } | ||
485 | } else { /* trying to grab a PTE entry */ | ||
486 | if (dax_is_pmd_entry(entry) && | 496 | if (dax_is_pmd_entry(entry) && |
487 | (dax_is_zero_entry(entry) || | 497 | (dax_is_zero_entry(entry) || |
488 | dax_is_empty_entry(entry))) { | 498 | dax_is_empty_entry(entry))) { |
@@ -523,7 +533,11 @@ retry: | |||
523 | if (entry) { | 533 | if (entry) { |
524 | dax_lock_entry(xas, entry); | 534 | dax_lock_entry(xas, entry); |
525 | } else { | 535 | } else { |
526 | entry = dax_make_entry(pfn_to_pfn_t(0), size_flag | DAX_EMPTY); | 536 | unsigned long flags = DAX_EMPTY; |
537 | |||
538 | if (order > 0) | ||
539 | flags |= DAX_PMD; | ||
540 | entry = dax_make_entry(pfn_to_pfn_t(0), flags); | ||
527 | dax_lock_entry(xas, entry); | 541 | dax_lock_entry(xas, entry); |
528 | if (xas_error(xas)) | 542 | if (xas_error(xas)) |
529 | goto out_unlock; | 543 | goto out_unlock; |
@@ -594,7 +608,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping) | |||
594 | if (WARN_ON_ONCE(!xa_is_value(entry))) | 608 | if (WARN_ON_ONCE(!xa_is_value(entry))) |
595 | continue; | 609 | continue; |
596 | if (unlikely(dax_is_locked(entry))) | 610 | if (unlikely(dax_is_locked(entry))) |
597 | entry = get_unlocked_entry(&xas); | 611 | entry = get_unlocked_entry(&xas, 0); |
598 | if (entry) | 612 | if (entry) |
599 | page = dax_busy_page(entry); | 613 | page = dax_busy_page(entry); |
600 | put_unlocked_entry(&xas, entry); | 614 | put_unlocked_entry(&xas, entry); |
@@ -621,7 +635,7 @@ static int __dax_invalidate_entry(struct address_space *mapping, | |||
621 | void *entry; | 635 | void *entry; |
622 | 636 | ||
623 | xas_lock_irq(&xas); | 637 | xas_lock_irq(&xas); |
624 | entry = get_unlocked_entry(&xas); | 638 | entry = get_unlocked_entry(&xas, 0); |
625 | if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) | 639 | if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) |
626 | goto out; | 640 | goto out; |
627 | if (!trunc && | 641 | if (!trunc && |
@@ -848,7 +862,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, | |||
848 | if (unlikely(dax_is_locked(entry))) { | 862 | if (unlikely(dax_is_locked(entry))) { |
849 | void *old_entry = entry; | 863 | void *old_entry = entry; |
850 | 864 | ||
851 | entry = get_unlocked_entry(xas); | 865 | entry = get_unlocked_entry(xas, 0); |
852 | 866 | ||
853 | /* Entry got punched out / reallocated? */ | 867 | /* Entry got punched out / reallocated? */ |
854 | if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) | 868 | if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) |
@@ -1509,7 +1523,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, | |||
1509 | * entry is already in the array, for instance), it will return | 1523 | * entry is already in the array, for instance), it will return |
1510 | * VM_FAULT_FALLBACK. | 1524 | * VM_FAULT_FALLBACK. |
1511 | */ | 1525 | */ |
1512 | entry = grab_mapping_entry(&xas, mapping, DAX_PMD); | 1526 | entry = grab_mapping_entry(&xas, mapping, PMD_ORDER); |
1513 | if (xa_is_internal(entry)) { | 1527 | if (xa_is_internal(entry)) { |
1514 | result = xa_to_internal(entry); | 1528 | result = xa_to_internal(entry); |
1515 | goto fallback; | 1529 | goto fallback; |
@@ -1658,11 +1672,10 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) | |||
1658 | vm_fault_t ret; | 1672 | vm_fault_t ret; |
1659 | 1673 | ||
1660 | xas_lock_irq(&xas); | 1674 | xas_lock_irq(&xas); |
1661 | entry = get_unlocked_entry(&xas); | 1675 | entry = get_unlocked_entry(&xas, order); |
1662 | /* Did we race with someone splitting entry or so? */ | 1676 | /* Did we race with someone splitting entry or so? */ |
1663 | if (!entry || | 1677 | if (!entry || dax_is_conflict(entry) || |
1664 | (order == 0 && !dax_is_pte_entry(entry)) || | 1678 | (order == 0 && !dax_is_pte_entry(entry))) { |
1665 | (order == PMD_ORDER && !dax_is_pmd_entry(entry))) { | ||
1666 | put_unlocked_entry(&xas, entry); | 1679 | put_unlocked_entry(&xas, entry); |
1667 | xas_unlock_irq(&xas); | 1680 | xas_unlock_irq(&xas); |
1668 | trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, | 1681 | trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, |