diff options
Diffstat (limited to 'fs/dax.c')
-rw-r--r-- | fs/dax.c | 89 |
1 files changed, 60 insertions, 29 deletions
@@ -98,12 +98,6 @@ static void *dax_make_entry(pfn_t pfn, unsigned long flags) | |||
98 | return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT)); | 98 | return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT)); |
99 | } | 99 | } |
100 | 100 | ||
101 | static void *dax_make_page_entry(struct page *page) | ||
102 | { | ||
103 | pfn_t pfn = page_to_pfn_t(page); | ||
104 | return dax_make_entry(pfn, PageHead(page) ? DAX_PMD : 0); | ||
105 | } | ||
106 | |||
107 | static bool dax_is_locked(void *entry) | 101 | static bool dax_is_locked(void *entry) |
108 | { | 102 | { |
109 | return xa_to_value(entry) & DAX_LOCKED; | 103 | return xa_to_value(entry) & DAX_LOCKED; |
@@ -116,12 +110,12 @@ static unsigned int dax_entry_order(void *entry) | |||
116 | return 0; | 110 | return 0; |
117 | } | 111 | } |
118 | 112 | ||
119 | static int dax_is_pmd_entry(void *entry) | 113 | static unsigned long dax_is_pmd_entry(void *entry) |
120 | { | 114 | { |
121 | return xa_to_value(entry) & DAX_PMD; | 115 | return xa_to_value(entry) & DAX_PMD; |
122 | } | 116 | } |
123 | 117 | ||
124 | static int dax_is_pte_entry(void *entry) | 118 | static bool dax_is_pte_entry(void *entry) |
125 | { | 119 | { |
126 | return !(xa_to_value(entry) & DAX_PMD); | 120 | return !(xa_to_value(entry) & DAX_PMD); |
127 | } | 121 | } |
@@ -222,9 +216,8 @@ static void *get_unlocked_entry(struct xa_state *xas) | |||
222 | ewait.wait.func = wake_exceptional_entry_func; | 216 | ewait.wait.func = wake_exceptional_entry_func; |
223 | 217 | ||
224 | for (;;) { | 218 | for (;;) { |
225 | entry = xas_load(xas); | 219 | entry = xas_find_conflict(xas); |
226 | if (!entry || xa_is_internal(entry) || | 220 | if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) || |
227 | WARN_ON_ONCE(!xa_is_value(entry)) || | ||
228 | !dax_is_locked(entry)) | 221 | !dax_is_locked(entry)) |
229 | return entry; | 222 | return entry; |
230 | 223 | ||
@@ -239,6 +232,34 @@ static void *get_unlocked_entry(struct xa_state *xas) | |||
239 | } | 232 | } |
240 | } | 233 | } |
241 | 234 | ||
235 | /* | ||
236 | * The only thing keeping the address space around is the i_pages lock | ||
237 | * (it's cycled in clear_inode() after removing the entries from i_pages) | ||
238 | * After we call xas_unlock_irq(), we cannot touch xas->xa. | ||
239 | */ | ||
240 | static void wait_entry_unlocked(struct xa_state *xas, void *entry) | ||
241 | { | ||
242 | struct wait_exceptional_entry_queue ewait; | ||
243 | wait_queue_head_t *wq; | ||
244 | |||
245 | init_wait(&ewait.wait); | ||
246 | ewait.wait.func = wake_exceptional_entry_func; | ||
247 | |||
248 | wq = dax_entry_waitqueue(xas, entry, &ewait.key); | ||
249 | prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); | ||
250 | xas_unlock_irq(xas); | ||
251 | schedule(); | ||
252 | finish_wait(wq, &ewait.wait); | ||
253 | |||
254 | /* | ||
255 | * Entry lock waits are exclusive. Wake up the next waiter since | ||
256 | * we aren't sure we will acquire the entry lock and thus wake | ||
257 | * the next waiter up on unlock. | ||
258 | */ | ||
259 | if (waitqueue_active(wq)) | ||
260 | __wake_up(wq, TASK_NORMAL, 1, &ewait.key); | ||
261 | } | ||
262 | |||
242 | static void put_unlocked_entry(struct xa_state *xas, void *entry) | 263 | static void put_unlocked_entry(struct xa_state *xas, void *entry) |
243 | { | 264 | { |
244 | /* If we were the only waiter woken, wake the next one */ | 265 | /* If we were the only waiter woken, wake the next one */ |
@@ -255,6 +276,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry) | |||
255 | { | 276 | { |
256 | void *old; | 277 | void *old; |
257 | 278 | ||
279 | BUG_ON(dax_is_locked(entry)); | ||
258 | xas_reset(xas); | 280 | xas_reset(xas); |
259 | xas_lock_irq(xas); | 281 | xas_lock_irq(xas); |
260 | old = xas_store(xas, entry); | 282 | old = xas_store(xas, entry); |
@@ -352,16 +374,27 @@ static struct page *dax_busy_page(void *entry) | |||
352 | return NULL; | 374 | return NULL; |
353 | } | 375 | } |
354 | 376 | ||
355 | bool dax_lock_mapping_entry(struct page *page) | 377 | /* |
378 | * dax_lock_mapping_entry - Lock the DAX entry corresponding to a page | ||
379 | * @page: The page whose entry we want to lock | ||
380 | * | ||
381 | * Context: Process context. | ||
382 | * Return: A cookie to pass to dax_unlock_page() or 0 if the entry could | ||
383 | * not be locked. | ||
384 | */ | ||
385 | dax_entry_t dax_lock_page(struct page *page) | ||
356 | { | 386 | { |
357 | XA_STATE(xas, NULL, 0); | 387 | XA_STATE(xas, NULL, 0); |
358 | void *entry; | 388 | void *entry; |
359 | 389 | ||
390 | /* Ensure page->mapping isn't freed while we look at it */ | ||
391 | rcu_read_lock(); | ||
360 | for (;;) { | 392 | for (;;) { |
361 | struct address_space *mapping = READ_ONCE(page->mapping); | 393 | struct address_space *mapping = READ_ONCE(page->mapping); |
362 | 394 | ||
363 | if (!dax_mapping(mapping)) | 395 | entry = NULL; |
364 | return false; | 396 | if (!mapping || !dax_mapping(mapping)) |
397 | break; | ||
365 | 398 | ||
366 | /* | 399 | /* |
367 | * In the device-dax case there's no need to lock, a | 400 | * In the device-dax case there's no need to lock, a |
@@ -370,8 +403,9 @@ bool dax_lock_mapping_entry(struct page *page) | |||
370 | * otherwise we would not have a valid pfn_to_page() | 403 | * otherwise we would not have a valid pfn_to_page() |
371 | * translation. | 404 | * translation. |
372 | */ | 405 | */ |
406 | entry = (void *)~0UL; | ||
373 | if (S_ISCHR(mapping->host->i_mode)) | 407 | if (S_ISCHR(mapping->host->i_mode)) |
374 | return true; | 408 | break; |
375 | 409 | ||
376 | xas.xa = &mapping->i_pages; | 410 | xas.xa = &mapping->i_pages; |
377 | xas_lock_irq(&xas); | 411 | xas_lock_irq(&xas); |
@@ -382,20 +416,20 @@ bool dax_lock_mapping_entry(struct page *page) | |||
382 | xas_set(&xas, page->index); | 416 | xas_set(&xas, page->index); |
383 | entry = xas_load(&xas); | 417 | entry = xas_load(&xas); |
384 | if (dax_is_locked(entry)) { | 418 | if (dax_is_locked(entry)) { |
385 | entry = get_unlocked_entry(&xas); | 419 | rcu_read_unlock(); |
386 | /* Did the page move while we slept? */ | 420 | wait_entry_unlocked(&xas, entry); |
387 | if (dax_to_pfn(entry) != page_to_pfn(page)) { | 421 | rcu_read_lock(); |
388 | xas_unlock_irq(&xas); | 422 | continue; |
389 | continue; | ||
390 | } | ||
391 | } | 423 | } |
392 | dax_lock_entry(&xas, entry); | 424 | dax_lock_entry(&xas, entry); |
393 | xas_unlock_irq(&xas); | 425 | xas_unlock_irq(&xas); |
394 | return true; | 426 | break; |
395 | } | 427 | } |
428 | rcu_read_unlock(); | ||
429 | return (dax_entry_t)entry; | ||
396 | } | 430 | } |
397 | 431 | ||
398 | void dax_unlock_mapping_entry(struct page *page) | 432 | void dax_unlock_page(struct page *page, dax_entry_t cookie) |
399 | { | 433 | { |
400 | struct address_space *mapping = page->mapping; | 434 | struct address_space *mapping = page->mapping; |
401 | XA_STATE(xas, &mapping->i_pages, page->index); | 435 | XA_STATE(xas, &mapping->i_pages, page->index); |
@@ -403,7 +437,7 @@ void dax_unlock_mapping_entry(struct page *page) | |||
403 | if (S_ISCHR(mapping->host->i_mode)) | 437 | if (S_ISCHR(mapping->host->i_mode)) |
404 | return; | 438 | return; |
405 | 439 | ||
406 | dax_unlock_entry(&xas, dax_make_page_entry(page)); | 440 | dax_unlock_entry(&xas, (void *)cookie); |
407 | } | 441 | } |
408 | 442 | ||
409 | /* | 443 | /* |
@@ -445,11 +479,9 @@ static void *grab_mapping_entry(struct xa_state *xas, | |||
445 | retry: | 479 | retry: |
446 | xas_lock_irq(xas); | 480 | xas_lock_irq(xas); |
447 | entry = get_unlocked_entry(xas); | 481 | entry = get_unlocked_entry(xas); |
448 | if (xa_is_internal(entry)) | ||
449 | goto fallback; | ||
450 | 482 | ||
451 | if (entry) { | 483 | if (entry) { |
452 | if (WARN_ON_ONCE(!xa_is_value(entry))) { | 484 | if (!xa_is_value(entry)) { |
453 | xas_set_err(xas, EIO); | 485 | xas_set_err(xas, EIO); |
454 | goto out_unlock; | 486 | goto out_unlock; |
455 | } | 487 | } |
@@ -1628,8 +1660,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) | |||
1628 | /* Did we race with someone splitting entry or so? */ | 1660 | /* Did we race with someone splitting entry or so? */ |
1629 | if (!entry || | 1661 | if (!entry || |
1630 | (order == 0 && !dax_is_pte_entry(entry)) || | 1662 | (order == 0 && !dax_is_pte_entry(entry)) || |
1631 | (order == PMD_ORDER && (xa_is_internal(entry) || | 1663 | (order == PMD_ORDER && !dax_is_pmd_entry(entry))) { |
1632 | !dax_is_pmd_entry(entry)))) { | ||
1633 | put_unlocked_entry(&xas, entry); | 1664 | put_unlocked_entry(&xas, entry); |
1634 | xas_unlock_irq(&xas); | 1665 | xas_unlock_irq(&xas); |
1635 | trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, | 1666 | trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, |