diff options
Diffstat (limited to 'fs/dax.c')
| -rw-r--r-- | fs/dax.c | 89 |
1 files changed, 60 insertions, 29 deletions
| @@ -98,12 +98,6 @@ static void *dax_make_entry(pfn_t pfn, unsigned long flags) | |||
| 98 | return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT)); | 98 | return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT)); |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | static void *dax_make_page_entry(struct page *page) | ||
| 102 | { | ||
| 103 | pfn_t pfn = page_to_pfn_t(page); | ||
| 104 | return dax_make_entry(pfn, PageHead(page) ? DAX_PMD : 0); | ||
| 105 | } | ||
| 106 | |||
| 107 | static bool dax_is_locked(void *entry) | 101 | static bool dax_is_locked(void *entry) |
| 108 | { | 102 | { |
| 109 | return xa_to_value(entry) & DAX_LOCKED; | 103 | return xa_to_value(entry) & DAX_LOCKED; |
| @@ -116,12 +110,12 @@ static unsigned int dax_entry_order(void *entry) | |||
| 116 | return 0; | 110 | return 0; |
| 117 | } | 111 | } |
| 118 | 112 | ||
| 119 | static int dax_is_pmd_entry(void *entry) | 113 | static unsigned long dax_is_pmd_entry(void *entry) |
| 120 | { | 114 | { |
| 121 | return xa_to_value(entry) & DAX_PMD; | 115 | return xa_to_value(entry) & DAX_PMD; |
| 122 | } | 116 | } |
| 123 | 117 | ||
| 124 | static int dax_is_pte_entry(void *entry) | 118 | static bool dax_is_pte_entry(void *entry) |
| 125 | { | 119 | { |
| 126 | return !(xa_to_value(entry) & DAX_PMD); | 120 | return !(xa_to_value(entry) & DAX_PMD); |
| 127 | } | 121 | } |
| @@ -222,9 +216,8 @@ static void *get_unlocked_entry(struct xa_state *xas) | |||
| 222 | ewait.wait.func = wake_exceptional_entry_func; | 216 | ewait.wait.func = wake_exceptional_entry_func; |
| 223 | 217 | ||
| 224 | for (;;) { | 218 | for (;;) { |
| 225 | entry = xas_load(xas); | 219 | entry = xas_find_conflict(xas); |
| 226 | if (!entry || xa_is_internal(entry) || | 220 | if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) || |
| 227 | WARN_ON_ONCE(!xa_is_value(entry)) || | ||
| 228 | !dax_is_locked(entry)) | 221 | !dax_is_locked(entry)) |
| 229 | return entry; | 222 | return entry; |
| 230 | 223 | ||
| @@ -239,6 +232,34 @@ static void *get_unlocked_entry(struct xa_state *xas) | |||
| 239 | } | 232 | } |
| 240 | } | 233 | } |
| 241 | 234 | ||
| 235 | /* | ||
| 236 | * The only thing keeping the address space around is the i_pages lock | ||
| 237 | * (it's cycled in clear_inode() after removing the entries from i_pages) | ||
| 238 | * After we call xas_unlock_irq(), we cannot touch xas->xa. | ||
| 239 | */ | ||
| 240 | static void wait_entry_unlocked(struct xa_state *xas, void *entry) | ||
| 241 | { | ||
| 242 | struct wait_exceptional_entry_queue ewait; | ||
| 243 | wait_queue_head_t *wq; | ||
| 244 | |||
| 245 | init_wait(&ewait.wait); | ||
| 246 | ewait.wait.func = wake_exceptional_entry_func; | ||
| 247 | |||
| 248 | wq = dax_entry_waitqueue(xas, entry, &ewait.key); | ||
| 249 | prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); | ||
| 250 | xas_unlock_irq(xas); | ||
| 251 | schedule(); | ||
| 252 | finish_wait(wq, &ewait.wait); | ||
| 253 | |||
| 254 | /* | ||
| 255 | * Entry lock waits are exclusive. Wake up the next waiter since | ||
| 256 | * we aren't sure we will acquire the entry lock and thus wake | ||
| 257 | * the next waiter up on unlock. | ||
| 258 | */ | ||
| 259 | if (waitqueue_active(wq)) | ||
| 260 | __wake_up(wq, TASK_NORMAL, 1, &ewait.key); | ||
| 261 | } | ||
| 262 | |||
| 242 | static void put_unlocked_entry(struct xa_state *xas, void *entry) | 263 | static void put_unlocked_entry(struct xa_state *xas, void *entry) |
| 243 | { | 264 | { |
| 244 | /* If we were the only waiter woken, wake the next one */ | 265 | /* If we were the only waiter woken, wake the next one */ |
| @@ -255,6 +276,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry) | |||
| 255 | { | 276 | { |
| 256 | void *old; | 277 | void *old; |
| 257 | 278 | ||
| 279 | BUG_ON(dax_is_locked(entry)); | ||
| 258 | xas_reset(xas); | 280 | xas_reset(xas); |
| 259 | xas_lock_irq(xas); | 281 | xas_lock_irq(xas); |
| 260 | old = xas_store(xas, entry); | 282 | old = xas_store(xas, entry); |
| @@ -352,16 +374,27 @@ static struct page *dax_busy_page(void *entry) | |||
| 352 | return NULL; | 374 | return NULL; |
| 353 | } | 375 | } |
| 354 | 376 | ||
| 355 | bool dax_lock_mapping_entry(struct page *page) | 377 | /* |
| 378 | * dax_lock_mapping_entry - Lock the DAX entry corresponding to a page | ||
| 379 | * @page: The page whose entry we want to lock | ||
| 380 | * | ||
| 381 | * Context: Process context. | ||
| 382 | * Return: A cookie to pass to dax_unlock_page() or 0 if the entry could | ||
| 383 | * not be locked. | ||
| 384 | */ | ||
| 385 | dax_entry_t dax_lock_page(struct page *page) | ||
| 356 | { | 386 | { |
| 357 | XA_STATE(xas, NULL, 0); | 387 | XA_STATE(xas, NULL, 0); |
| 358 | void *entry; | 388 | void *entry; |
| 359 | 389 | ||
| 390 | /* Ensure page->mapping isn't freed while we look at it */ | ||
| 391 | rcu_read_lock(); | ||
| 360 | for (;;) { | 392 | for (;;) { |
| 361 | struct address_space *mapping = READ_ONCE(page->mapping); | 393 | struct address_space *mapping = READ_ONCE(page->mapping); |
| 362 | 394 | ||
| 363 | if (!dax_mapping(mapping)) | 395 | entry = NULL; |
| 364 | return false; | 396 | if (!mapping || !dax_mapping(mapping)) |
| 397 | break; | ||
| 365 | 398 | ||
| 366 | /* | 399 | /* |
| 367 | * In the device-dax case there's no need to lock, a | 400 | * In the device-dax case there's no need to lock, a |
| @@ -370,8 +403,9 @@ bool dax_lock_mapping_entry(struct page *page) | |||
| 370 | * otherwise we would not have a valid pfn_to_page() | 403 | * otherwise we would not have a valid pfn_to_page() |
| 371 | * translation. | 404 | * translation. |
| 372 | */ | 405 | */ |
| 406 | entry = (void *)~0UL; | ||
| 373 | if (S_ISCHR(mapping->host->i_mode)) | 407 | if (S_ISCHR(mapping->host->i_mode)) |
| 374 | return true; | 408 | break; |
| 375 | 409 | ||
| 376 | xas.xa = &mapping->i_pages; | 410 | xas.xa = &mapping->i_pages; |
| 377 | xas_lock_irq(&xas); | 411 | xas_lock_irq(&xas); |
| @@ -382,20 +416,20 @@ bool dax_lock_mapping_entry(struct page *page) | |||
| 382 | xas_set(&xas, page->index); | 416 | xas_set(&xas, page->index); |
| 383 | entry = xas_load(&xas); | 417 | entry = xas_load(&xas); |
| 384 | if (dax_is_locked(entry)) { | 418 | if (dax_is_locked(entry)) { |
| 385 | entry = get_unlocked_entry(&xas); | 419 | rcu_read_unlock(); |
| 386 | /* Did the page move while we slept? */ | 420 | wait_entry_unlocked(&xas, entry); |
| 387 | if (dax_to_pfn(entry) != page_to_pfn(page)) { | 421 | rcu_read_lock(); |
| 388 | xas_unlock_irq(&xas); | 422 | continue; |
| 389 | continue; | ||
| 390 | } | ||
| 391 | } | 423 | } |
| 392 | dax_lock_entry(&xas, entry); | 424 | dax_lock_entry(&xas, entry); |
| 393 | xas_unlock_irq(&xas); | 425 | xas_unlock_irq(&xas); |
| 394 | return true; | 426 | break; |
| 395 | } | 427 | } |
| 428 | rcu_read_unlock(); | ||
| 429 | return (dax_entry_t)entry; | ||
| 396 | } | 430 | } |
| 397 | 431 | ||
| 398 | void dax_unlock_mapping_entry(struct page *page) | 432 | void dax_unlock_page(struct page *page, dax_entry_t cookie) |
| 399 | { | 433 | { |
| 400 | struct address_space *mapping = page->mapping; | 434 | struct address_space *mapping = page->mapping; |
| 401 | XA_STATE(xas, &mapping->i_pages, page->index); | 435 | XA_STATE(xas, &mapping->i_pages, page->index); |
| @@ -403,7 +437,7 @@ void dax_unlock_mapping_entry(struct page *page) | |||
| 403 | if (S_ISCHR(mapping->host->i_mode)) | 437 | if (S_ISCHR(mapping->host->i_mode)) |
| 404 | return; | 438 | return; |
| 405 | 439 | ||
| 406 | dax_unlock_entry(&xas, dax_make_page_entry(page)); | 440 | dax_unlock_entry(&xas, (void *)cookie); |
| 407 | } | 441 | } |
| 408 | 442 | ||
| 409 | /* | 443 | /* |
| @@ -445,11 +479,9 @@ static void *grab_mapping_entry(struct xa_state *xas, | |||
| 445 | retry: | 479 | retry: |
| 446 | xas_lock_irq(xas); | 480 | xas_lock_irq(xas); |
| 447 | entry = get_unlocked_entry(xas); | 481 | entry = get_unlocked_entry(xas); |
| 448 | if (xa_is_internal(entry)) | ||
| 449 | goto fallback; | ||
| 450 | 482 | ||
| 451 | if (entry) { | 483 | if (entry) { |
| 452 | if (WARN_ON_ONCE(!xa_is_value(entry))) { | 484 | if (!xa_is_value(entry)) { |
| 453 | xas_set_err(xas, EIO); | 485 | xas_set_err(xas, EIO); |
| 454 | goto out_unlock; | 486 | goto out_unlock; |
| 455 | } | 487 | } |
| @@ -1628,8 +1660,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) | |||
| 1628 | /* Did we race with someone splitting entry or so? */ | 1660 | /* Did we race with someone splitting entry or so? */ |
| 1629 | if (!entry || | 1661 | if (!entry || |
| 1630 | (order == 0 && !dax_is_pte_entry(entry)) || | 1662 | (order == 0 && !dax_is_pte_entry(entry)) || |
| 1631 | (order == PMD_ORDER && (xa_is_internal(entry) || | 1663 | (order == PMD_ORDER && !dax_is_pmd_entry(entry))) { |
| 1632 | !dax_is_pmd_entry(entry)))) { | ||
| 1633 | put_unlocked_entry(&xas, entry); | 1664 | put_unlocked_entry(&xas, entry); |
| 1634 | xas_unlock_irq(&xas); | 1665 | xas_unlock_irq(&xas); |
| 1635 | trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, | 1666 | trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, |
