diff options
| -rw-r--r-- | include/linux/mm.h | 12 | ||||
| -rw-r--r-- | mm/mlock.c | 110 |
2 files changed, 85 insertions, 37 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index dce24569f8fc..03f84b8d7359 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -643,12 +643,12 @@ static inline enum zone_type page_zonenum(const struct page *page) | |||
| 643 | #endif | 643 | #endif |
| 644 | 644 | ||
| 645 | /* | 645 | /* |
| 646 | * The identification function is only used by the buddy allocator for | 646 | * The identification function is mainly used by the buddy allocator for |
| 647 | * determining if two pages could be buddies. We are not really | 647 | * determining if two pages could be buddies. We are not really identifying |
| 648 | * identifying a zone since we could be using a the section number | 648 | * the zone since we could be using the section number id if we do not have |
| 649 | * id if we have not node id available in page flags. | 649 | * node id available in page flags. |
| 650 | * We guarantee only that it will return the same value for two | 650 | * We only guarantee that it will return the same value for two combinable |
| 651 | * combinable pages in a zone. | 651 | * pages in a zone. |
| 652 | */ | 652 | */ |
| 653 | static inline int page_zone_id(struct page *page) | 653 | static inline int page_zone_id(struct page *page) |
| 654 | { | 654 | { |
diff --git a/mm/mlock.c b/mm/mlock.c index 19a934dce5d6..d63802663242 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
| @@ -280,8 +280,7 @@ static void __putback_lru_fast(struct pagevec *pvec, int pgrescued) | |||
| 280 | * The second phase finishes the munlock only for pages where isolation | 280 | * The second phase finishes the munlock only for pages where isolation |
| 281 | * succeeded. | 281 | * succeeded. |
| 282 | * | 282 | * |
| 283 | * Note that pvec is modified during the process. Before returning | 283 | * Note that the pagevec may be modified during the process. |
| 284 | * pagevec_reinit() is called on it. | ||
| 285 | */ | 284 | */ |
| 286 | static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) | 285 | static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) |
| 287 | { | 286 | { |
| @@ -356,8 +355,60 @@ skip_munlock: | |||
| 356 | */ | 355 | */ |
| 357 | if (pagevec_count(&pvec_putback)) | 356 | if (pagevec_count(&pvec_putback)) |
| 358 | __putback_lru_fast(&pvec_putback, pgrescued); | 357 | __putback_lru_fast(&pvec_putback, pgrescued); |
| 358 | } | ||
| 359 | |||
| 360 | /* | ||
| 361 | * Fill up pagevec for __munlock_pagevec using pte walk | ||
| 362 | * | ||
| 363 | * The function expects that the struct page corresponding to @start address is | ||
| 364 | * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone. | ||
| 365 | * | ||
| 366 | * The rest of @pvec is filled by subsequent pages within the same pmd and same | ||
| 367 | * zone, as long as the pte's are present and vm_normal_page() succeeds. These | ||
| 368 | * pages also get pinned. | ||
| 369 | * | ||
| 370 | * Returns the address of the next page that should be scanned. This equals | ||
| 371 | * @start + PAGE_SIZE when no page could be added by the pte walk. | ||
| 372 | */ | ||
| 373 | static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, | ||
| 374 | struct vm_area_struct *vma, int zoneid, unsigned long start, | ||
| 375 | unsigned long end) | ||
| 376 | { | ||
| 377 | pte_t *pte; | ||
| 378 | spinlock_t *ptl; | ||
| 379 | |||
| 380 | /* | ||
| 381 | * Initialize pte walk starting at the already pinned page where we | ||
| 382 | * are sure that there is a pte. | ||
| 383 | */ | ||
| 384 | pte = get_locked_pte(vma->vm_mm, start, &ptl); | ||
| 385 | end = min(end, pmd_addr_end(start, end)); | ||
| 386 | |||
| 387 | /* The page next to the pinned page is the first we will try to get */ | ||
| 388 | start += PAGE_SIZE; | ||
| 389 | while (start < end) { | ||
| 390 | struct page *page = NULL; | ||
| 391 | pte++; | ||
| 392 | if (pte_present(*pte)) | ||
| 393 | page = vm_normal_page(vma, start, *pte); | ||
| 394 | /* | ||
| 395 | * Break if page could not be obtained or the page's node+zone does not | ||
| 396 | * match | ||
| 397 | */ | ||
| 398 | if (!page || page_zone_id(page) != zoneid) | ||
| 399 | break; | ||
| 359 | 400 | ||
| 360 | pagevec_reinit(pvec); | 401 | get_page(page); |
| 402 | /* | ||
| 403 | * Increase the address that will be returned *before* the | ||
| 404 | * eventual break due to pvec becoming full by adding the page | ||
| 405 | */ | ||
| 406 | start += PAGE_SIZE; | ||
| 407 | if (pagevec_add(pvec, page) == 0) | ||
| 408 | break; | ||
| 409 | } | ||
| 410 | pte_unmap_unlock(pte, ptl); | ||
| 411 | return start; | ||
| 361 | } | 412 | } |
| 362 | 413 | ||
| 363 | /* | 414 | /* |
| @@ -381,17 +432,16 @@ skip_munlock: | |||
| 381 | void munlock_vma_pages_range(struct vm_area_struct *vma, | 432 | void munlock_vma_pages_range(struct vm_area_struct *vma, |
| 382 | unsigned long start, unsigned long end) | 433 | unsigned long start, unsigned long end) |
| 383 | { | 434 | { |
| 384 | struct pagevec pvec; | ||
| 385 | struct zone *zone = NULL; | ||
| 386 | |||
| 387 | pagevec_init(&pvec, 0); | ||
| 388 | vma->vm_flags &= ~VM_LOCKED; | 435 | vma->vm_flags &= ~VM_LOCKED; |
| 389 | 436 | ||
| 390 | while (start < end) { | 437 | while (start < end) { |
| 391 | struct page *page; | 438 | struct page *page = NULL; |
| 392 | unsigned int page_mask, page_increm; | 439 | unsigned int page_mask, page_increm; |
| 393 | struct zone *pagezone; | 440 | struct pagevec pvec; |
| 441 | struct zone *zone; | ||
| 442 | int zoneid; | ||
| 394 | 443 | ||
| 444 | pagevec_init(&pvec, 0); | ||
| 395 | /* | 445 | /* |
| 396 | * Although FOLL_DUMP is intended for get_dump_page(), | 446 | * Although FOLL_DUMP is intended for get_dump_page(), |
| 397 | * it just so happens that its special treatment of the | 447 | * it just so happens that its special treatment of the |
| @@ -400,22 +450,10 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
| 400 | * has sneaked into the range, we won't oops here: great). | 450 | * has sneaked into the range, we won't oops here: great). |
| 401 | */ | 451 | */ |
| 402 | page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, | 452 | page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, |
| 403 | &page_mask); | 453 | &page_mask); |
| 454 | |||
| 404 | if (page && !IS_ERR(page)) { | 455 | if (page && !IS_ERR(page)) { |
| 405 | pagezone = page_zone(page); | ||
| 406 | /* The whole pagevec must be in the same zone */ | ||
| 407 | if (pagezone != zone) { | ||
| 408 | if (pagevec_count(&pvec)) | ||
| 409 | __munlock_pagevec(&pvec, zone); | ||
| 410 | zone = pagezone; | ||
| 411 | } | ||
| 412 | if (PageTransHuge(page)) { | 456 | if (PageTransHuge(page)) { |
| 413 | /* | ||
| 414 | * THP pages are not handled by pagevec due | ||
| 415 | * to their possible split (see below). | ||
| 416 | */ | ||
| 417 | if (pagevec_count(&pvec)) | ||
| 418 | __munlock_pagevec(&pvec, zone); | ||
| 419 | lock_page(page); | 457 | lock_page(page); |
| 420 | /* | 458 | /* |
| 421 | * Any THP page found by follow_page_mask() may | 459 | * Any THP page found by follow_page_mask() may |
| @@ -428,21 +466,31 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
| 428 | put_page(page); /* follow_page_mask() */ | 466 | put_page(page); /* follow_page_mask() */ |
| 429 | } else { | 467 | } else { |
| 430 | /* | 468 | /* |
| 431 | * Non-huge pages are handled in batches | 469 | * Non-huge pages are handled in batches via |
| 432 | * via pagevec. The pin from | 470 | * pagevec. The pin from follow_page_mask() |
| 433 | * follow_page_mask() prevents them from | 471 | * prevents them from collapsing by THP. |
| 434 | * collapsing by THP. | 472 | */ |
| 473 | pagevec_add(&pvec, page); | ||
| 474 | zone = page_zone(page); | ||
| 475 | zoneid = page_zone_id(page); | ||
| 476 | |||
| 477 | /* | ||
| 478 | * Try to fill the rest of pagevec using fast | ||
| 479 | * pte walk. This will also update start to | ||
| 480 | * the next page to process. Then munlock the | ||
| 481 | * pagevec. | ||
| 435 | */ | 482 | */ |
| 436 | if (pagevec_add(&pvec, page) == 0) | 483 | start = __munlock_pagevec_fill(&pvec, vma, |
| 437 | __munlock_pagevec(&pvec, zone); | 484 | zoneid, start, end); |
| 485 | __munlock_pagevec(&pvec, zone); | ||
| 486 | goto next; | ||
| 438 | } | 487 | } |
| 439 | } | 488 | } |
| 440 | page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); | 489 | page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); |
| 441 | start += page_increm * PAGE_SIZE; | 490 | start += page_increm * PAGE_SIZE; |
| 491 | next: | ||
| 442 | cond_resched(); | 492 | cond_resched(); |
| 443 | } | 493 | } |
| 444 | if (pagevec_count(&pvec)) | ||
| 445 | __munlock_pagevec(&pvec, zone); | ||
| 446 | } | 494 | } |
| 447 | 495 | ||
| 448 | /* | 496 | /* |
